From 7d77bbaa9ec433a886a7f8c0f24246867bc79f5f Mon Sep 17 00:00:00 2001 From: ashahid Date: Thu, 16 Nov 2023 15:37:48 -0800 Subject: [PATCH 001/129] SPARK-45959. --- .../scala/org/apache/spark/sql/Dataset.scala | 45 ++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 5a372f9a0f91..47babb1b7080 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -1572,7 +1572,13 @@ class Dataset[T] private[sql]( case other => other } - Project(untypedCols.map(_.named), logicalPlan) + val newProjList = untypedCols.map(_.named) + (logicalPlan, newProjList) match { + case EasilyFlattenable(flattendPlan) => flattendPlan + + case _ => Project(newProjList, logicalPlan) + } + } } @@ -4455,3 +4461,40 @@ class Dataset[T] private[sql]( toArrowBatchRdd(queryExecution.executedPlan) } } + +private[sql] object EasilyFlattenable { + def unapply(tuple: (LogicalPlan, Seq[NamedExpression])): Option[LogicalPlan] = { + val (logicalPlan, newProjList) = tuple + logicalPlan match { + case p @ Project(projList, child) => + val newlyAddedCols = newProjList.drop(projList.size) + val childAttribNames = child.output.map(_.name).toSet + val remappedAttribsInProj = projList.collect { + case Alias(expr, name) if childAttribNames.contains(name) && !expr.isInstanceOf[Attribute] + => name + }.toSet + + val canBeFlattend = newlyAddedCols.forall(ne => ne match { + // this is case of duplicating column, for now do not handle + case Alias(_: Attribute, _) => false + + case Alias(expr, _) => if (expr.references.isEmpty) { + true + } else { + val attsNameInNewExpr = expr.references.map(_.name).toSet + attsNameInNewExpr.subsetOf(childAttribNames) && + remappedAttribsInProj.forall(!attsNameInNewExpr.contains(_)) + } + + case _ => false + }) + if (canBeFlattend) { + Option(p.copy(projList ++ newlyAddedCols)) + } else { + None + } + + case _ => None + } + } +} \ No newline at end of file From 4b0c91a58cf3d324e13af0ef7ba1215c3da9d946 Mon Sep 17 00:00:00 2001 From: ashahid Date: Thu, 16 Nov 2023 15:52:11 -0800 Subject: [PATCH 002/129] SPARK-45959. --- sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 47babb1b7080..22dc242261fa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -4497,4 +4497,4 @@ private[sql] object EasilyFlattenable { case _ => None } } -} \ No newline at end of file +} From b3c529cf04202ab488d01ec7c8c85020527aa3ca Mon Sep 17 00:00:00 2001 From: ashahid Date: Thu, 16 Nov 2023 17:21:24 -0800 Subject: [PATCH 003/129] SPARK-45959. --- .../scala/org/apache/spark/sql/Dataset.scala | 50 ++++++++++--------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 22dc242261fa..d541c5a8d175 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -4468,31 +4468,35 @@ private[sql] object EasilyFlattenable { logicalPlan match { case p @ Project(projList, child) => val newlyAddedCols = newProjList.drop(projList.size) - val childAttribNames = child.output.map(_.name).toSet - val remappedAttribsInProj = projList.collect { - case Alias(expr, name) if childAttribNames.contains(name) && !expr.isInstanceOf[Attribute] - => name - }.toSet - - val canBeFlattend = newlyAddedCols.forall(ne => ne match { - // this is case of duplicating column, for now do not handle - case Alias(_: Attribute, _) => false - - case Alias(expr, _) => if (expr.references.isEmpty) { - true + if (newlyAddedCols.nonEmpty) { + val childAttribNames = child.output.map(_.name).toSet + val remappedAttribsInProj = projList.collect { + case Alias(expr, name) if childAttribNames.contains(name) && + !expr.isInstanceOf[Attribute] => name + }.toSet + + val canBeFlattend = newlyAddedCols.forall(ne => ne match { + // this is case of duplicating column, for now do not handle + case Alias(_: Attribute, _) => false + + case Alias(expr, _) => if (expr.references.isEmpty) { + true + } else { + val attsNameInNewExpr = expr.references.map(_.name).toSet + attsNameInNewExpr.subsetOf(childAttribNames) && + remappedAttribsInProj.forall(!attsNameInNewExpr.contains(_)) + } + + case _ => false + }) + if (canBeFlattend) { + Option(p.copy(projList ++ newlyAddedCols)) } else { - val attsNameInNewExpr = expr.references.map(_.name).toSet - attsNameInNewExpr.subsetOf(childAttribNames) && - remappedAttribsInProj.forall(!attsNameInNewExpr.contains(_)) + None } - - case _ => false - }) - if (canBeFlattend) { - Option(p.copy(projList ++ newlyAddedCols)) - } else { - None - } + } else { + None + } case _ => None } From 948f4caf7416056dd299d6c6f685019c33345b21 Mon Sep 17 00:00:00 2001 From: ashahid Date: Thu, 16 Nov 2023 20:33:19 -0800 Subject: [PATCH 004/129] SPARK-45959. --- sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index d541c5a8d175..f3b0c3594e8c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -1574,7 +1574,7 @@ class Dataset[T] private[sql]( } val newProjList = untypedCols.map(_.named) (logicalPlan, newProjList) match { - case EasilyFlattenable(flattendPlan) => flattendPlan + case EasilyFlattenable(flattendPlan) if !this.isStreaming => flattendPlan case _ => Project(newProjList, logicalPlan) } From c5c7b9979e0d154daa753fcd828678563f3626e0 Mon Sep 17 00:00:00 2001 From: ashahid Date: Thu, 16 Nov 2023 23:00:43 -0800 Subject: [PATCH 005/129] SPARK-45959. --- .../src/main/scala/org/apache/spark/sql/Dataset.scala | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index f3b0c3594e8c..123e625231e8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -4466,10 +4466,11 @@ private[sql] object EasilyFlattenable { def unapply(tuple: (LogicalPlan, Seq[NamedExpression])): Option[LogicalPlan] = { val (logicalPlan, newProjList) = tuple logicalPlan match { - case p @ Project(projList, child) => + case p @ Project(projList, child) if !child.isStreaming => val newlyAddedCols = newProjList.drop(projList.size) if (newlyAddedCols.nonEmpty) { val childAttribNames = child.output.map(_.name).toSet + val childAttribNameToExprids = child.output.map(x => x.name -> x.exprId).toMap val remappedAttribsInProj = projList.collect { case Alias(expr, name) if childAttribNames.contains(name) && !expr.isInstanceOf[Attribute] => name @@ -4490,7 +4491,11 @@ private[sql] object EasilyFlattenable { case _ => false }) if (canBeFlattend) { - Option(p.copy(projList ++ newlyAddedCols)) + // remap the newly added cols to correct attribute refs + val remappedAttribs = newlyAddedCols.map(_.transformUp { + case attr: AttributeReference => attr.withExprId(childAttribNameToExprids(attr.name)) + }) + Option(p.copy(projList ++ remappedAttribs.map(_.asInstanceOf[NamedExpression]))) } else { None } From 42816b7a824964901ef0a659ff18c3307973aa1c Mon Sep 17 00:00:00 2001 From: ashahid Date: Fri, 17 Nov 2023 15:51:43 -0800 Subject: [PATCH 006/129] SPARK-45959. --- .../scala/org/apache/spark/sql/Dataset.scala | 65 +++++++++---------- .../spark/sql/execution/CacheManager.scala | 26 ++++++-- 2 files changed, 51 insertions(+), 40 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 123e625231e8..cfdb74d9c648 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -4467,41 +4467,36 @@ private[sql] object EasilyFlattenable { val (logicalPlan, newProjList) = tuple logicalPlan match { case p @ Project(projList, child) if !child.isStreaming => - val newlyAddedCols = newProjList.drop(projList.size) - if (newlyAddedCols.nonEmpty) { - val childAttribNames = child.output.map(_.name).toSet - val childAttribNameToExprids = child.output.map(x => x.name -> x.exprId).toMap - val remappedAttribsInProj = projList.collect { - case Alias(expr, name) if childAttribNames.contains(name) && - !expr.isInstanceOf[Attribute] => name - }.toSet - - val canBeFlattend = newlyAddedCols.forall(ne => ne match { - // this is case of duplicating column, for now do not handle - case Alias(_: Attribute, _) => false - - case Alias(expr, _) => if (expr.references.isEmpty) { - true - } else { - val attsNameInNewExpr = expr.references.map(_.name).toSet - attsNameInNewExpr.subsetOf(childAttribNames) && - remappedAttribsInProj.forall(!attsNameInNewExpr.contains(_)) - } - - case _ => false - }) - if (canBeFlattend) { - // remap the newly added cols to correct attribute refs - val remappedAttribs = newlyAddedCols.map(_.transformUp { - case attr: AttributeReference => attr.withExprId(childAttribNameToExprids(attr.name)) - }) - Option(p.copy(projList ++ remappedAttribs.map(_.asInstanceOf[NamedExpression]))) - } else { - None - } - } else { - None - } + val currentOutputAttribs = logicalPlan.output + // In the new column list identify those Named Expressions which are just attributes and + // hence pass thru + val (passThruAttribs, tinkeredOrNewNamedExprs) = newProjList.partition(ne => ne match { + case _: AttributeReference => true + case _ => false + }) + if (passThruAttribs.size == currentOutputAttribs.size) { + assert(tinkeredOrNewNamedExprs.nonEmpty) + val attributesTinkeredInProject = AttributeSet(projList.filter(_ match { + case _: Alias => true + case _ => false + }).map(_.toAttribute)) + val attributesTinkeredInProjectAsName = attributesTinkeredInProject.map(_.name).toSet + if (tinkeredOrNewNamedExprs.exists(ne => ne.references.exists(attr => attr match { + case u: UnresolvedAttribute => attributesTinkeredInProjectAsName.contains(u.name) + case resAttr => attributesTinkeredInProject.contains(resAttr) + } ))) { + None + } else { + val remappedNewProjList = newProjList.map(ne => ne match { + case attr: AttributeReference => projList.find(_.toAttribute == attr).get + case x => x + }) + Option(p.copy(projectList = remappedNewProjList)) + } + } else { + // for now None + None + } case _ => None } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index 093599af1222..089a43e64aca 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -17,17 +17,15 @@ package org.apache.spark.sql.execution -import scala.collection.immutable.IndexedSeq - import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.internal.Logging import org.apache.spark.internal.config.ConfigEntry import org.apache.spark.sql.{Dataset, SparkSession} import org.apache.spark.sql.catalyst.catalog.HiveTableRelation -import org.apache.spark.sql.catalyst.expressions.{Attribute, SubqueryExpression} +import org.apache.spark.sql.catalyst.expressions.{Attribute, ExpressionSet, SubqueryExpression} import org.apache.spark.sql.catalyst.optimizer.EliminateResolvedHint -import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan, ResolvedHint, SubqueryAlias, View} +import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan, ResolvedHint, SubqueryAlias, UnaryNode, View} import org.apache.spark.sql.catalyst.trees.TreePattern.PLAN_EXPRESSION import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.execution.columnar.InMemoryRelation @@ -295,7 +293,25 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { /** Optionally returns cached data for the given [[LogicalPlan]]. */ def lookupCachedData(plan: LogicalPlan): Option[CachedData] = { - cachedData.find(cd => plan.sameResult(cd.plan)) + cachedData.find(cd => if (plan.sameResult(cd.plan)) { + true + } else { + (plan, cd.plan) match { + case (incomingPlan: UnaryNode, cachedPlan: UnaryNode) => + if (incomingPlan.child.sameResult(cachedPlan.child)) { + if (incomingPlan.getClass == cachedPlan.getClass) { + val incomingExprs = incomingPlan.expressions + val cachedPlanExprs = ExpressionSet(cachedPlan.expressions) + incomingExprs.forall(ex => ex.references.isEmpty || cachedPlanExprs.contains(ex)) + } else { + false + } + } else { + false + } + case _ => false + } + }) } /** Replaces segments of the given logical plan with cached versions where possible. */ From 7f2945a2bb0d88b1ec11354bd1f838fb64206528 Mon Sep 17 00:00:00 2001 From: ashahid Date: Fri, 17 Nov 2023 16:58:15 -0800 Subject: [PATCH 007/129] SPARK-45959. --- sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index cfdb74d9c648..d98c28025852 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -4474,8 +4474,8 @@ private[sql] object EasilyFlattenable { case _: AttributeReference => true case _ => false }) - if (passThruAttribs.size == currentOutputAttribs.size) { - assert(tinkeredOrNewNamedExprs.nonEmpty) + // TODO: analyze the case tinkeredOrNewNamedExprs.isEmpty using DataFrameSuite + if (passThruAttribs.size == currentOutputAttribs.size && tinkeredOrNewNamedExprs.nonEmpty) { val attributesTinkeredInProject = AttributeSet(projList.filter(_ match { case _: Alias => true case _ => false From 027e0b28628ab7b6d3b103ac5881fd9ea3925435 Mon Sep 17 00:00:00 2001 From: ashahid Date: Sat, 18 Nov 2023 00:11:01 -0800 Subject: [PATCH 008/129] SPARK-45959. --- sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index d98c28025852..ae51a40eb48f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -4488,7 +4488,8 @@ private[sql] object EasilyFlattenable { None } else { val remappedNewProjList = newProjList.map(ne => ne match { - case attr: AttributeReference => projList.find(_.toAttribute == attr).get + case attr: AttributeReference => projList.find( + _.toAttribute.canonicalized == attr.canonicalized).get case x => x }) Option(p.copy(projectList = remappedNewProjList)) From 0ecbc30cf6e62e0f2e94340ad682d669ecafe40c Mon Sep 17 00:00:00 2001 From: ashahid Date: Sat, 18 Nov 2023 17:20:02 -0800 Subject: [PATCH 009/129] SPARK-45959. --- sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index ae51a40eb48f..343e2b88dede 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -4467,7 +4467,7 @@ private[sql] object EasilyFlattenable { val (logicalPlan, newProjList) = tuple logicalPlan match { case p @ Project(projList, child) if !child.isStreaming => - val currentOutputAttribs = logicalPlan.output + val currentOutputAttribs = AttributeSet(logicalPlan.output) // In the new column list identify those Named Expressions which are just attributes and // hence pass thru val (passThruAttribs, tinkeredOrNewNamedExprs) = newProjList.partition(ne => ne match { @@ -4475,7 +4475,8 @@ private[sql] object EasilyFlattenable { case _ => false }) // TODO: analyze the case tinkeredOrNewNamedExprs.isEmpty using DataFrameSuite - if (passThruAttribs.size == currentOutputAttribs.size && tinkeredOrNewNamedExprs.nonEmpty) { + if (passThruAttribs.size == currentOutputAttribs.size && passThruAttribs.forall( + currentOutputAttribs.contains) && tinkeredOrNewNamedExprs.nonEmpty) { val attributesTinkeredInProject = AttributeSet(projList.filter(_ match { case _: Alias => true case _ => false From 09296c487cef808d8e4021af711717f5940e1b7c Mon Sep 17 00:00:00 2001 From: ashahid Date: Sat, 18 Nov 2023 19:56:39 -0800 Subject: [PATCH 010/129] SPARK-45959. --- .../src/main/scala/org/apache/spark/sql/Dataset.scala | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 343e2b88dede..6338da31dad8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -4488,11 +4488,13 @@ private[sql] object EasilyFlattenable { } ))) { None } else { - val remappedNewProjList = newProjList.map(ne => ne match { + val remappedNewProjList = newProjList.map(ne => (ne transformUp { case attr: AttributeReference => projList.find( _.toAttribute.canonicalized == attr.canonicalized).get - case x => x - }) + case u: UnresolvedAttribute if u.getTagValue(LogicalPlan.PLAN_ID_TAG).isDefined => + u.setTagValue(LogicalPlan.PLAN_ID_TAG, child) + u + }).asInstanceOf[NamedExpression]) Option(p.copy(projectList = remappedNewProjList)) } } else { From a6054dcd5286cd5a18ad4d8262043c29109b4a9d Mon Sep 17 00:00:00 2001 From: ashahid Date: Sat, 18 Nov 2023 20:26:35 -0800 Subject: [PATCH 011/129] SPARK-45959. --- sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 6338da31dad8..f7b4be2f42c4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -4491,9 +4491,10 @@ private[sql] object EasilyFlattenable { val remappedNewProjList = newProjList.map(ne => (ne transformUp { case attr: AttributeReference => projList.find( _.toAttribute.canonicalized == attr.canonicalized).get - case u: UnresolvedAttribute if u.getTagValue(LogicalPlan.PLAN_ID_TAG).isDefined => - u.setTagValue(LogicalPlan.PLAN_ID_TAG, child) - u + case u: UnresolvedAttribute if child.getTagValue(LogicalPlan.PLAN_ID_TAG).isDefined => + u.setTagValue(LogicalPlan.PLAN_ID_TAG, + child.getTagValue(LogicalPlan.PLAN_ID_TAG)) + u }).asInstanceOf[NamedExpression]) Option(p.copy(projectList = remappedNewProjList)) } From bab251801b8d166e20b5330494383925837f2f87 Mon Sep 17 00:00:00 2001 From: ashahid Date: Sun, 19 Nov 2023 01:51:12 -0800 Subject: [PATCH 012/129] SPARK-45959. --- sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index f7b4be2f42c4..cd51b37120b6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -4492,9 +4492,9 @@ private[sql] object EasilyFlattenable { case attr: AttributeReference => projList.find( _.toAttribute.canonicalized == attr.canonicalized).get case u: UnresolvedAttribute if child.getTagValue(LogicalPlan.PLAN_ID_TAG).isDefined => - u.setTagValue(LogicalPlan.PLAN_ID_TAG, - child.getTagValue(LogicalPlan.PLAN_ID_TAG)) - u + child.getTagValue[Long](LogicalPlan.PLAN_ID_TAG).foreach(u.setTagValue[Long]( + LogicalPlan.PLAN_ID_TAG, _)) + u }).asInstanceOf[NamedExpression]) Option(p.copy(projectList = remappedNewProjList)) } From d0267d5dfc92118a7fd9b430d2c51e02e77e1d93 Mon Sep 17 00:00:00 2001 From: ashahid Date: Sun, 19 Nov 2023 02:34:52 -0800 Subject: [PATCH 013/129] SPARK-45959. --- sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index cd51b37120b6..efdbee8c36fc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -4491,10 +4491,7 @@ private[sql] object EasilyFlattenable { val remappedNewProjList = newProjList.map(ne => (ne transformUp { case attr: AttributeReference => projList.find( _.toAttribute.canonicalized == attr.canonicalized).get - case u: UnresolvedAttribute if child.getTagValue(LogicalPlan.PLAN_ID_TAG).isDefined => - child.getTagValue[Long](LogicalPlan.PLAN_ID_TAG).foreach(u.setTagValue[Long]( - LogicalPlan.PLAN_ID_TAG, _)) - u + case u: UnresolvedAttribute => projList.find(_.toAttribute.name == u.name).get }).asInstanceOf[NamedExpression]) Option(p.copy(projectList = remappedNewProjList)) } From 6d035eec0ff6846fc523a2dbb6a2a10880fdc4ab Mon Sep 17 00:00:00 2001 From: ashahid Date: Sun, 19 Nov 2023 08:04:20 -0800 Subject: [PATCH 014/129] SPARK-45959. --- sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index efdbee8c36fc..5148120179b0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -4491,7 +4491,10 @@ private[sql] object EasilyFlattenable { val remappedNewProjList = newProjList.map(ne => (ne transformUp { case attr: AttributeReference => projList.find( _.toAttribute.canonicalized == attr.canonicalized).get - case u: UnresolvedAttribute => projList.find(_.toAttribute.name == u.name).get + case u: UnresolvedAttribute if p.getTagValue(LogicalPlan.PLAN_ID_TAG).isDefined => + p.getTagValue[Long](LogicalPlan.PLAN_ID_TAG).foreach(u.setTagValue[Long]( + LogicalPlan.PLAN_ID_TAG, _)) + u }).asInstanceOf[NamedExpression]) Option(p.copy(projectList = remappedNewProjList)) } From 38a1d446a7316815b5c718c9fed91d139e160899 Mon Sep 17 00:00:00 2001 From: ashahid Date: Sun, 19 Nov 2023 11:07:49 -0800 Subject: [PATCH 015/129] SPARK-45959. --- sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 5148120179b0..993c62386a81 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -4490,9 +4490,9 @@ private[sql] object EasilyFlattenable { } else { val remappedNewProjList = newProjList.map(ne => (ne transformUp { case attr: AttributeReference => projList.find( - _.toAttribute.canonicalized == attr.canonicalized).get - case u: UnresolvedAttribute if p.getTagValue(LogicalPlan.PLAN_ID_TAG).isDefined => - p.getTagValue[Long](LogicalPlan.PLAN_ID_TAG).foreach(u.setTagValue[Long]( + _.toAttribute.canonicalized == attr.canonicalized).getOrElse(attr) + case u: UnresolvedAttribute if child.getTagValue(LogicalPlan.PLAN_ID_TAG).isDefined => + child.getTagValue[Long](LogicalPlan.PLAN_ID_TAG).foreach(u.setTagValue( LogicalPlan.PLAN_ID_TAG, _)) u }).asInstanceOf[NamedExpression]) From 0e5d627fde2f795a27b568c7d919f9c28c78fc37 Mon Sep 17 00:00:00 2001 From: ashahid Date: Sun, 19 Nov 2023 14:18:13 -0800 Subject: [PATCH 016/129] SPARK-45959. --- .../src/main/scala/org/apache/spark/sql/Dataset.scala | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 993c62386a81..7598136d22c5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -4483,7 +4483,9 @@ private[sql] object EasilyFlattenable { }).map(_.toAttribute)) val attributesTinkeredInProjectAsName = attributesTinkeredInProject.map(_.name).toSet if (tinkeredOrNewNamedExprs.exists(ne => ne.references.exists(attr => attr match { - case u: UnresolvedAttribute => attributesTinkeredInProjectAsName.contains(u.name) + case u: UnresolvedAttribute => + attributesTinkeredInProjectAsName.contains(u.name) || + u.getTagValue(LogicalPlan.PLAN_ID_TAG).isDefined case resAttr => attributesTinkeredInProject.contains(resAttr) } ))) { None @@ -4491,10 +4493,6 @@ private[sql] object EasilyFlattenable { val remappedNewProjList = newProjList.map(ne => (ne transformUp { case attr: AttributeReference => projList.find( _.toAttribute.canonicalized == attr.canonicalized).getOrElse(attr) - case u: UnresolvedAttribute if child.getTagValue(LogicalPlan.PLAN_ID_TAG).isDefined => - child.getTagValue[Long](LogicalPlan.PLAN_ID_TAG).foreach(u.setTagValue( - LogicalPlan.PLAN_ID_TAG, _)) - u }).asInstanceOf[NamedExpression]) Option(p.copy(projectList = remappedNewProjList)) } From b8575dcffca33caf8cb29d33e119c1dc524254ec Mon Sep 17 00:00:00 2001 From: ashahid Date: Sun, 19 Nov 2023 18:49:50 -0800 Subject: [PATCH 017/129] SPARK-45959. --- sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 7598136d22c5..c7918318ccbf 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -4466,7 +4466,8 @@ private[sql] object EasilyFlattenable { def unapply(tuple: (LogicalPlan, Seq[NamedExpression])): Option[LogicalPlan] = { val (logicalPlan, newProjList) = tuple logicalPlan match { - case p @ Project(projList, child) if !child.isStreaming => + case p @ Project(projList, child) if !child.isStreaming && + p.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty => val currentOutputAttribs = AttributeSet(logicalPlan.output) // In the new column list identify those Named Expressions which are just attributes and // hence pass thru @@ -4483,9 +4484,7 @@ private[sql] object EasilyFlattenable { }).map(_.toAttribute)) val attributesTinkeredInProjectAsName = attributesTinkeredInProject.map(_.name).toSet if (tinkeredOrNewNamedExprs.exists(ne => ne.references.exists(attr => attr match { - case u: UnresolvedAttribute => - attributesTinkeredInProjectAsName.contains(u.name) || - u.getTagValue(LogicalPlan.PLAN_ID_TAG).isDefined + case u: UnresolvedAttribute => attributesTinkeredInProjectAsName.contains(u.name) case resAttr => attributesTinkeredInProject.contains(resAttr) } ))) { None From c11496bf7c708db20f65339fd282a5962cb037dd Mon Sep 17 00:00:00 2001 From: ashahid Date: Mon, 20 Nov 2023 01:16:16 -0800 Subject: [PATCH 018/129] SPARK-45959. --- .../src/main/scala/org/apache/spark/sql/Dataset.scala | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index c7918318ccbf..40c7971f74e1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -4466,8 +4466,7 @@ private[sql] object EasilyFlattenable { def unapply(tuple: (LogicalPlan, Seq[NamedExpression])): Option[LogicalPlan] = { val (logicalPlan, newProjList) = tuple logicalPlan match { - case p @ Project(projList, child) if !child.isStreaming && - p.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty => + case p @ Project(projList, child) if !child.isStreaming => val currentOutputAttribs = AttributeSet(logicalPlan.output) // In the new column list identify those Named Expressions which are just attributes and // hence pass thru @@ -4475,7 +4474,7 @@ private[sql] object EasilyFlattenable { case _: AttributeReference => true case _ => false }) - // TODO: analyze the case tinkeredOrNewNamedExprs.isEmpty using DataFrameSuite + if (passThruAttribs.size == currentOutputAttribs.size && passThruAttribs.forall( currentOutputAttribs.contains) && tinkeredOrNewNamedExprs.nonEmpty) { val attributesTinkeredInProject = AttributeSet(projList.filter(_ match { @@ -4493,7 +4492,10 @@ private[sql] object EasilyFlattenable { case attr: AttributeReference => projList.find( _.toAttribute.canonicalized == attr.canonicalized).getOrElse(attr) }).asInstanceOf[NamedExpression]) - Option(p.copy(projectList = remappedNewProjList)) + val newProj = p.copy(projectList = remappedNewProjList) + p.getTagValue[Long](LogicalPlan.PLAN_ID_TAG).foreach(newProj.setTagValue[Long]( + LogicalPlan.PLAN_ID_TAG, _)) + Option(newProj) } } else { // for now None From 0abf34a82c7ea6f82e4dad316ea48c8e52731ba1 Mon Sep 17 00:00:00 2001 From: ashahid Date: Mon, 20 Nov 2023 11:03:16 -0800 Subject: [PATCH 019/129] SPARK-45959. --- sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 40c7971f74e1..232bc01e5d68 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -1574,7 +1574,9 @@ class Dataset[T] private[sql]( } val newProjList = untypedCols.map(_.named) (logicalPlan, newProjList) match { - case EasilyFlattenable(flattendPlan) if !this.isStreaming => flattendPlan + case EasilyFlattenable(flattendPlan) if !this.isStreaming && + logicalPlan.collectLeaves().forall(_.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty) => + flattendPlan case _ => Project(newProjList, logicalPlan) } From ebb2738b0a57e0cda42e457597e94c0e1fd268e8 Mon Sep 17 00:00:00 2001 From: ashahid Date: Mon, 20 Nov 2023 12:35:50 -0800 Subject: [PATCH 020/129] SPARK-45959. --- .../scala/org/apache/spark/sql/Dataset.scala | 49 +------------- .../sql/internal/EasilyFlattenable.scala | 67 +++++++++++++++++++ 2 files changed, 70 insertions(+), 46 deletions(-) create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 232bc01e5d68..9e05231e82b8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -58,7 +58,7 @@ import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, DataSourceV2ScanRelation, FileTable} import org.apache.spark.sql.execution.python.EvaluatePython import org.apache.spark.sql.execution.stat.StatFunctions -import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.internal.{EasilyFlattenable, SQLConf} import org.apache.spark.sql.streaming.DataStreamWriter import org.apache.spark.sql.types._ import org.apache.spark.sql.util.SchemaUtils @@ -1575,7 +1575,7 @@ class Dataset[T] private[sql]( val newProjList = untypedCols.map(_.named) (logicalPlan, newProjList) match { case EasilyFlattenable(flattendPlan) if !this.isStreaming && - logicalPlan.collectLeaves().forall(_.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty) => + !logicalPlan.exists(_.getTagValue(LogicalPlan.PLAN_ID_TAG).nonEmpty) => flattendPlan case _ => Project(newProjList, logicalPlan) @@ -4464,47 +4464,4 @@ class Dataset[T] private[sql]( } } -private[sql] object EasilyFlattenable { - def unapply(tuple: (LogicalPlan, Seq[NamedExpression])): Option[LogicalPlan] = { - val (logicalPlan, newProjList) = tuple - logicalPlan match { - case p @ Project(projList, child) if !child.isStreaming => - val currentOutputAttribs = AttributeSet(logicalPlan.output) - // In the new column list identify those Named Expressions which are just attributes and - // hence pass thru - val (passThruAttribs, tinkeredOrNewNamedExprs) = newProjList.partition(ne => ne match { - case _: AttributeReference => true - case _ => false - }) - - if (passThruAttribs.size == currentOutputAttribs.size && passThruAttribs.forall( - currentOutputAttribs.contains) && tinkeredOrNewNamedExprs.nonEmpty) { - val attributesTinkeredInProject = AttributeSet(projList.filter(_ match { - case _: Alias => true - case _ => false - }).map(_.toAttribute)) - val attributesTinkeredInProjectAsName = attributesTinkeredInProject.map(_.name).toSet - if (tinkeredOrNewNamedExprs.exists(ne => ne.references.exists(attr => attr match { - case u: UnresolvedAttribute => attributesTinkeredInProjectAsName.contains(u.name) - case resAttr => attributesTinkeredInProject.contains(resAttr) - } ))) { - None - } else { - val remappedNewProjList = newProjList.map(ne => (ne transformUp { - case attr: AttributeReference => projList.find( - _.toAttribute.canonicalized == attr.canonicalized).getOrElse(attr) - }).asInstanceOf[NamedExpression]) - val newProj = p.copy(projectList = remappedNewProjList) - p.getTagValue[Long](LogicalPlan.PLAN_ID_TAG).foreach(newProj.setTagValue[Long]( - LogicalPlan.PLAN_ID_TAG, _)) - Option(newProj) - } - } else { - // for now None - None - } - - case _ => None - } - } -} + diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala new file mode 100644 index 000000000000..4c3c48ac4c47 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.internal + +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, AttributeSet, NamedExpression} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} + +private[sql] object EasilyFlattenable { + def unapply(tuple: (LogicalPlan, Seq[NamedExpression])): Option[LogicalPlan] = { + val (logicalPlan, newProjList) = tuple + logicalPlan match { + case p@Project(projList, child) => + val currentOutputAttribs = AttributeSet(logicalPlan.output) + // In the new column list identify those Named Expressions which are just attributes and + // hence pass thru + val (passThruAttribs, tinkeredOrNewNamedExprs) = newProjList.partition { + case _: AttributeReference => true + case _ => false + } + + if (passThruAttribs.size == currentOutputAttribs.size && passThruAttribs.forall( + currentOutputAttribs.contains) && tinkeredOrNewNamedExprs.nonEmpty) { + val attributesTinkeredInProject = AttributeSet(projList.filter(_ match { + case _: Alias => true + case _ => false + }).map(_.toAttribute)) + val attributesTinkeredInProjectAsName = attributesTinkeredInProject.map(_.name).toSet + if (tinkeredOrNewNamedExprs.exists(ne => ne.references.exists(attr => attr match { + case u: UnresolvedAttribute => attributesTinkeredInProjectAsName.contains(u.name) + case resAttr => attributesTinkeredInProject.contains(resAttr) + }))) { + None + } else { + val remappedNewProjList = newProjList.map(ne => (ne transformUp { + case attr: AttributeReference => projList.find( + _.toAttribute.canonicalized == attr.canonicalized).getOrElse(attr) + }).asInstanceOf[NamedExpression]) + val newProj = p.copy(projectList = remappedNewProjList) + p.getTagValue[Long](LogicalPlan.PLAN_ID_TAG).foreach(newProj.setTagValue[Long]( + LogicalPlan.PLAN_ID_TAG, _)) + Option(newProj) + } + } else { + // for now None + None + } + + case _ => None + } + } +} From a5de567f9326806c4f9ac080b76f19082168236a Mon Sep 17 00:00:00 2001 From: ashahid Date: Mon, 20 Nov 2023 13:59:53 -0800 Subject: [PATCH 021/129] SPARK-45959. --- sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 1 - .../org/apache/spark/sql/internal/EasilyFlattenable.scala | 5 +---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 9e05231e82b8..8319fb3f9367 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -1580,7 +1580,6 @@ class Dataset[T] private[sql]( case _ => Project(newProjList, logicalPlan) } - } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 4c3c48ac4c47..1ea8dd8ade6e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -51,10 +51,7 @@ private[sql] object EasilyFlattenable { case attr: AttributeReference => projList.find( _.toAttribute.canonicalized == attr.canonicalized).getOrElse(attr) }).asInstanceOf[NamedExpression]) - val newProj = p.copy(projectList = remappedNewProjList) - p.getTagValue[Long](LogicalPlan.PLAN_ID_TAG).foreach(newProj.setTagValue[Long]( - LogicalPlan.PLAN_ID_TAG, _)) - Option(newProj) + Option(p.copy(projectList = remappedNewProjList)) } } else { // for now None From bfd28f8cd1cce9ff2a3d142db0f5f38b504e64ba Mon Sep 17 00:00:00 2001 From: ashahid Date: Tue, 21 Nov 2023 14:46:47 -0800 Subject: [PATCH 022/129] SPARK-45959 --- .../sql/internal/EasilyFlattenable.scala | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 1ea8dd8ade6e..e5ad2c2256f3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.internal import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute -import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, AttributeSet, NamedExpression} +import org.apache.spark.sql.catalyst.expressions.{AttributeReference, AttributeSet, NamedExpression} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} private[sql] object EasilyFlattenable { @@ -26,25 +26,26 @@ private[sql] object EasilyFlattenable { val (logicalPlan, newProjList) = tuple logicalPlan match { case p@Project(projList, child) => - val currentOutputAttribs = AttributeSet(logicalPlan.output) + // In the new column list identify those Named Expressions which are just attributes and // hence pass thru val (passThruAttribs, tinkeredOrNewNamedExprs) = newProjList.partition { case _: AttributeReference => true case _ => false } + val currentOutputAttribs = AttributeSet(p.output) if (passThruAttribs.size == currentOutputAttribs.size && passThruAttribs.forall( currentOutputAttribs.contains) && tinkeredOrNewNamedExprs.nonEmpty) { - val attributesTinkeredInProject = AttributeSet(projList.filter(_ match { - case _: Alias => true - case _ => false - }).map(_.toAttribute)) - val attributesTinkeredInProjectAsName = attributesTinkeredInProject.map(_.name).toSet - if (tinkeredOrNewNamedExprs.exists(ne => ne.references.exists(attr => attr match { - case u: UnresolvedAttribute => attributesTinkeredInProjectAsName.contains(u.name) - case resAttr => attributesTinkeredInProject.contains(resAttr) - }))) { + + val attribsReassignedInProj = AttributeSet(projList.filter(ne => ne match { + case _: AttributeReference => false + case _ => true + }).map(_.toAttribute)).intersect(AttributeSet(child.output)) + if (tinkeredOrNewNamedExprs.exists(ne => ne.references.exists { + case attr: AttributeReference => attribsReassignedInProj.contains(attr) + case u: UnresolvedAttribute => attribsReassignedInProj.exists(_.name == u.name) + })) { None } else { val remappedNewProjList = newProjList.map(ne => (ne transformUp { From e67761b0cf3d0cdbe8b9ae9a017e762d3546c1e3 Mon Sep 17 00:00:00 2001 From: ashahid Date: Wed, 22 Nov 2023 15:28:58 -0800 Subject: [PATCH 023/129] SPARK-45959 --- .../scala/org/apache/spark/sql/Dataset.scala | 2 +- .../spark/sql/execution/CacheManager.scala | 113 ++++++++++++------ .../command/AnalyzeColumnCommand.scala | 16 ++- .../sql/execution/command/CommandUtils.scala | 2 +- .../spark/sql/execution/command/tables.scala | 3 +- .../datasources/v2/DataSourceV2Strategy.scala | 4 +- .../sql/internal/EasilyFlattenable.scala | 20 +++- .../command/AlterTableRenameSuiteBase.scala | 2 +- 8 files changed, 113 insertions(+), 49 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index d369b8962776..8d775988a184 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -3902,7 +3902,7 @@ class Dataset[T] private[sql]( */ def storageLevel: StorageLevel = { sparkSession.sharedState.cacheManager.lookupCachedData(this).map { cachedData => - cachedData.cachedRepresentation.cacheBuilder.storageLevel + cachedData.cachedRepresentation.toOption.get.cacheBuilder.storageLevel }.getOrElse(StorageLevel.NONE) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index 089a43e64aca..9d2830085ea3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -18,14 +18,13 @@ package org.apache.spark.sql.execution import org.apache.hadoop.fs.{FileSystem, Path} - import org.apache.spark.internal.Logging import org.apache.spark.internal.config.ConfigEntry import org.apache.spark.sql.{Dataset, SparkSession} import org.apache.spark.sql.catalyst.catalog.HiveTableRelation -import org.apache.spark.sql.catalyst.expressions.{Attribute, ExpressionSet, SubqueryExpression} +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, NamedExpression, SubqueryExpression} import org.apache.spark.sql.catalyst.optimizer.EliminateResolvedHint -import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan, ResolvedHint, SubqueryAlias, UnaryNode, View} +import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan, Project, ResolvedHint, SubqueryAlias, UnaryNode, View} import org.apache.spark.sql.catalyst.trees.TreePattern.PLAN_EXPRESSION import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.execution.columnar.InMemoryRelation @@ -38,7 +37,8 @@ import org.apache.spark.storage.StorageLevel.MEMORY_AND_DISK import org.apache.spark.util.ArrayImplicits._ /** Holds a cached logical plan and its data */ -case class CachedData(plan: LogicalPlan, cachedRepresentation: InMemoryRelation) +case class CachedData(plan: LogicalPlan, + cachedRepresentation: Either[LogicalPlan, InMemoryRelation]) /** * Provides support in a SQLContext for caching query results and automatically using these cached @@ -71,7 +71,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { /** Clears all cached tables. */ def clearCache(): Unit = this.synchronized { - cachedData.foreach(_.cachedRepresentation.cacheBuilder.clearCache()) + cachedData.foreach(_.cachedRepresentation.toOption.get.cacheBuilder.clearCache()) cachedData = IndexedSeq[CachedData]() } @@ -114,7 +114,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { storageLevel: StorageLevel): Unit = { if (storageLevel == StorageLevel.NONE) { // Do nothing for StorageLevel.NONE since it will not actually cache any data. - } else if (lookupCachedData(planToCache).nonEmpty) { + } else if (lookupCachedData(planToCache).exists(_.cachedRepresentation.isRight)) { logWarning("Asked to cache already cached data.") } else { val sessionWithConfigsOff = getOrCloneSessionWithConfigsOff(spark) @@ -127,10 +127,10 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { } this.synchronized { - if (lookupCachedData(planToCache).nonEmpty) { + if (lookupCachedData(planToCache).exists(_.cachedRepresentation.isRight)) { logWarning("Data has already been cached.") } else { - cachedData = CachedData(planToCache, inMemoryRelation) +: cachedData + cachedData = CachedData(planToCache, Right(inMemoryRelation)) +: cachedData } } } @@ -216,7 +216,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { this.synchronized { cachedData = cachedData.filterNot(cd => plansToUncache.exists(_ eq cd)) } - plansToUncache.foreach { _.cachedRepresentation.cacheBuilder.clearCache(blocking) } + plansToUncache.foreach { _.cachedRepresentation.toOption.get.cacheBuilder.clearCache(blocking) } // Re-compile dependent cached queries after removing the cached query. if (!cascade) { @@ -233,7 +233,8 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { // 2) The buffer has been cleared, but `isCachedColumnBuffersLoaded` returns true, then we // will keep it as it is. It means the physical plan has been re-compiled already in the // other thread. - val cacheAlreadyLoaded = cd.cachedRepresentation.cacheBuilder.isCachedColumnBuffersLoaded + val cacheAlreadyLoaded = cd.cachedRepresentation.toOption.get.cacheBuilder. + isCachedColumnBuffersLoaded cd.plan.exists(isMatchedPlan) && !cacheAlreadyLoaded }) } @@ -246,8 +247,8 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { column: Seq[Attribute]): Unit = { val relation = cachedData.cachedRepresentation val (rowCount, newColStats) = - CommandUtils.computeColumnStats(sparkSession, relation, column) - relation.updateStats(rowCount, newColStats) + CommandUtils.computeColumnStats(sparkSession, relation.toOption.get, column) + relation.toOption.get.updateStats(rowCount, newColStats) } /** @@ -269,15 +270,15 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { cachedData = cachedData.filterNot(cd => needToRecache.exists(_ eq cd)) } needToRecache.foreach { cd => - cd.cachedRepresentation.cacheBuilder.clearCache() + cd.cachedRepresentation.toOption.get.cacheBuilder.clearCache() val sessionWithConfigsOff = getOrCloneSessionWithConfigsOff(spark) val newCache = sessionWithConfigsOff.withActive { val qe = sessionWithConfigsOff.sessionState.executePlan(cd.plan) - InMemoryRelation(cd.cachedRepresentation.cacheBuilder, qe) + InMemoryRelation(cd.cachedRepresentation.toOption.get.cacheBuilder, qe) } - val recomputedPlan = cd.copy(cachedRepresentation = newCache) + val recomputedPlan = cd.copy(cachedRepresentation = Right(newCache)) this.synchronized { - if (lookupCachedData(recomputedPlan.plan).nonEmpty) { + if (lookupCachedData(recomputedPlan.plan).exists(_.cachedRepresentation.isRight)) { logWarning("While recaching, data was already added to cache.") } else { cachedData = recomputedPlan +: cachedData @@ -293,24 +294,66 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { /** Optionally returns cached data for the given [[LogicalPlan]]. */ def lookupCachedData(plan: LogicalPlan): Option[CachedData] = { - cachedData.find(cd => if (plan.sameResult(cd.plan)) { - true - } else { - (plan, cd.plan) match { - case (incomingPlan: UnaryNode, cachedPlan: UnaryNode) => - if (incomingPlan.child.sameResult(cachedPlan.child)) { - if (incomingPlan.getClass == cachedPlan.getClass) { - val incomingExprs = incomingPlan.expressions - val cachedPlanExprs = ExpressionSet(cachedPlan.expressions) - incomingExprs.forall(ex => ex.references.isEmpty || cachedPlanExprs.contains(ex)) - } else { - false + val fullMatch = cachedData.find(cd => plan.sameResult(cd.plan)) + fullMatch.map(Option(_)).getOrElse({ + var foundMatch = false + var partialMatch: Option[CachedData] = None + for (cd <- cachedData if !foundMatch) { + (plan, cd.plan) match { + case (incomingPlan: UnaryNode, cachedPlan: UnaryNode) => + if (incomingPlan.child.sameResult(cachedPlan.child)) { + if (incomingPlan.getClass == cachedPlan.getClass && + incomingPlan.isInstanceOf[Project]) { + val incomingProject = incomingPlan.asInstanceOf[Project] + val cdPlanProject = cachedPlan.asInstanceOf[Project] + val canonicalizedInProj = incomingProject.canonicalized.asInstanceOf[Project] + val canonicalizedCdProj = cdPlanProject.canonicalized.asInstanceOf[Project] + // index if -1 indicates it is created out of existing output attribs + val (equivMapping, inComingProjNeedingMod) = canonicalizedInProj.projectList. + zipWithIndex.map { + case (ne, index) => index -> canonicalizedCdProj.projectList.indexWhere(_ == ne) + }.partition(_._2 != -1) + + val cdAttribToInAttrib = equivMapping.map { + case (inAttribIndex, cdAttribIndex) => + cdPlanProject.projectList(cdAttribIndex).toAttribute -> + incomingProject.projectList(inAttribIndex).toAttribute + }.toMap + if (cdAttribToInAttrib.size == cachedPlan.output.size && + canonicalizedInProj.projectList.map(_.references).reduce(_ ++ _). + subsetOf(canonicalizedCdProj.outputSet)) { + val projectionToForceOnCdPlan = cachedPlan.output.map(cdAttribToInAttrib) + val modifiedInProj = incomingProject.projectList.zipWithIndex.map { + case (ne, indx) => if (equivMapping.exists(_._1 == indx)) { + ne.toAttribute + } else { + ne.transformUp { + case attr: Attribute => val indexInChildOutput = + incomingPlan.child.output.indexWhere(_.canonicalized == attr.canonicalized) + val attribInChildCdPlan = cachedPlan.child.output(indexInChildOutput) + val attribInCdPlan = cdPlanProject.projectList.find { + case attr: Attribute => + attr.canonicalized == attribInChildCdPlan.canonicalized + case al: Alias => + al.child.canonicalized == attribInChildCdPlan.canonicalized + }.get.toAttribute + cdAttribToInAttrib.find( + _._1.canonicalized == attribInCdPlan.canonicalized).map(_._2).get + }.asInstanceOf[NamedExpression] + } + } + val newPartialPlan = Project(modifiedInProj, cd.cachedRepresentation.toOption. + get.withOutput(projectionToForceOnCdPlan)) + partialMatch = Option(cd.copy(cachedRepresentation = Left(newPartialPlan))) + foundMatch = true + } + } } - } else { - false - } - case _ => false + + case _ => + } } + partialMatch }) } @@ -319,11 +362,13 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { val newPlan = plan transformDown { case command: IgnoreCachedData => command - case currentFragment => + case currentFragment if !currentFragment.isInstanceOf[InMemoryRelation] => lookupCachedData(currentFragment).map { cached => // After cache lookup, we should still keep the hints from the input plan. val hints = EliminateResolvedHint.extractHintsFromPlan(currentFragment)._2 - val cachedPlan = cached.cachedRepresentation.withOutput(currentFragment.output) + val cachedPlan = cached.cachedRepresentation.map(_.withOutput(currentFragment.output)). + merge + // The returned hint list is in top-down order, we should create the hint nodes from // right to left. hints.foldRight[LogicalPlan](cachedPlan) { case (hint, p) => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala index 299f41eb55e1..0e3b69824abe 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala @@ -62,12 +62,16 @@ case class AnalyzeColumnCommand( private def analyzeColumnInCachedData(plan: LogicalPlan, sparkSession: SparkSession): Boolean = { val cacheManager = sparkSession.sharedState.cacheManager val planToLookup = sparkSession.sessionState.executePlan(plan).analyzed - cacheManager.lookupCachedData(planToLookup).map { cachedData => - val columnsToAnalyze = getColumnsToAnalyze( - tableIdent, cachedData.cachedRepresentation, columnNames, allColumns) - cacheManager.analyzeColumnCacheQuery(sparkSession, cachedData, columnsToAnalyze) - cachedData - }.isDefined + cacheManager.lookupCachedData(planToLookup).exists { cachedData => + if (cachedData.cachedRepresentation.isRight) { + val columnsToAnalyze = getColumnsToAnalyze( + tableIdent, cachedData.cachedRepresentation.merge, columnNames, allColumns) + cacheManager.analyzeColumnCacheQuery(sparkSession, cachedData, columnsToAnalyze) + true + } else { + false + } + } } private def analyzeColumnInTempView(plan: LogicalPlan, sparkSession: SparkSession): Unit = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala index 73478272a684..9571c0ff3904 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala @@ -238,7 +238,7 @@ object CommandUtils extends Logging { // Analyzes a catalog view if the view is cached val table = sparkSession.table(tableIdent.quotedString) val cacheManager = sparkSession.sharedState.cacheManager - if (cacheManager.lookupCachedData(table.logicalPlan).isDefined) { + if (cacheManager.lookupCachedData(table.logicalPlan).exists(_.cachedRepresentation.isRight)) { if (!noScan) { // To collect table stats, materializes an underlying columnar RDD table.count() diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 7ed82b16cc5e..95661685db28 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -201,7 +201,8 @@ case class AlterTableRenameCommand( // If `optStorageLevel` is defined, the old table was cached. val optCachedData = sparkSession.sharedState.cacheManager.lookupCachedData( sparkSession.table(oldName.unquotedString)) - val optStorageLevel = optCachedData.map(_.cachedRepresentation.cacheBuilder.storageLevel) + val optStorageLevel = optCachedData.map(_.cachedRepresentation.toOption.get.cacheBuilder. + storageLevel) if (optStorageLevel.isDefined) { CommandUtils.uncacheTableOrView(sparkSession, oldName.unquotedString) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 3f0dab11cda2..706a6fa06a3c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -85,8 +85,8 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat val v2Relation = DataSourceV2Relation.create(r.table, Some(r.catalog), Some(r.identifier)) val cache = session.sharedState.cacheManager.lookupCachedData(v2Relation) session.sharedState.cacheManager.uncacheQuery(session, v2Relation, cascade = true) - if (cache.isDefined) { - val cacheLevel = cache.get.cachedRepresentation.cacheBuilder.storageLevel + if (cache.exists(_.cachedRepresentation.isRight)) { + val cacheLevel = cache.get.cachedRepresentation.toOption.get.cacheBuilder.storageLevel Some(cacheLevel) } else { None diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index e5ad2c2256f3..e59703dd680c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.internal import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute -import org.apache.spark.sql.catalyst.expressions.{AttributeReference, AttributeSet, NamedExpression} +import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, AttributeSet, NamedExpression} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} private[sql] object EasilyFlattenable { @@ -48,10 +48,24 @@ private[sql] object EasilyFlattenable { })) { None } else { - val remappedNewProjList = newProjList.map(ne => (ne transformUp { + val remappedNewProjList = newProjList.map(ne => ne match { case attr: AttributeReference => projList.find( _.toAttribute.canonicalized == attr.canonicalized).getOrElse(attr) - }).asInstanceOf[NamedExpression]) + case anyOtherExpr => (anyOtherExpr transformUp { + case attr: AttributeReference => projList.find( + _.toAttribute.canonicalized == attr.canonicalized).map(x => x match { + case al: Alias => al.child + case _ => x + }).getOrElse(attr) + + case u: UnresolvedAttribute => projList.find( + _.toAttribute.name == u.name).map(x => x match { + case al: Alias => al.child + case _ => x + }).getOrElse(u) + + }).asInstanceOf[NamedExpression] + }) Option(p.copy(projectList = remappedNewProjList)) } } else { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala index 506b44741ab4..0d7c336f59fc 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala @@ -73,7 +73,7 @@ trait AlterTableRenameSuiteBase extends QueryTest with DDLCommandTestUtils { def getStorageLevel(tableName: String): StorageLevel = { val table = spark.table(tableName) val cachedData = spark.sharedState.cacheManager.lookupCachedData(table).get - cachedData.cachedRepresentation.cacheBuilder.storageLevel + cachedData.cachedRepresentation.toOption.get.cacheBuilder.storageLevel } sql(s"CREATE TABLE $src (c0 INT) $defaultUsing") sql(s"INSERT INTO $src SELECT 0") From 00df8133750845ae88971c51ab1d02316fa42fe1 Mon Sep 17 00:00:00 2001 From: ashahid Date: Wed, 22 Nov 2023 15:31:36 -0800 Subject: [PATCH 024/129] SPARK-45959 --- .../main/scala/org/apache/spark/sql/execution/CacheManager.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index 9d2830085ea3..7897ec1989a6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.execution import org.apache.hadoop.fs.{FileSystem, Path} + import org.apache.spark.internal.Logging import org.apache.spark.internal.config.ConfigEntry import org.apache.spark.sql.{Dataset, SparkSession} From 784ce4922b8436560f440b8f652942ea882d21a3 Mon Sep 17 00:00:00 2001 From: ashahid Date: Wed, 22 Nov 2023 15:46:10 -0800 Subject: [PATCH 025/129] SPARK-45959 --- sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 8d775988a184..33b35c0dc7e5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -1574,8 +1574,7 @@ class Dataset[T] private[sql]( } val newProjList = untypedCols.map(_.named) (logicalPlan, newProjList) match { - case EasilyFlattenable(flattendPlan) if !this.isStreaming && - !logicalPlan.exists(_.getTagValue(LogicalPlan.PLAN_ID_TAG).nonEmpty) => + case EasilyFlattenable(flattendPlan) if !this.isStreaming => flattendPlan case _ => Project(newProjList, logicalPlan) From b0e6c895845e11b36f6d431319c846dd969f7e0b Mon Sep 17 00:00:00 2001 From: ashahid Date: Wed, 22 Nov 2023 21:46:26 -0800 Subject: [PATCH 026/129] SPARK-45959 --- .../org/apache/spark/sql/internal/EasilyFlattenable.scala | 4 ++-- .../spark/sql/execution/python/ExtractPythonUDFsSuite.scala | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index e59703dd680c..5b0e603c6896 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.internal -import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction} import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, AttributeSet, NamedExpression} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} @@ -45,7 +45,7 @@ private[sql] object EasilyFlattenable { if (tinkeredOrNewNamedExprs.exists(ne => ne.references.exists { case attr: AttributeReference => attribsReassignedInProj.contains(attr) case u: UnresolvedAttribute => attribsReassignedInProj.exists(_.name == u.name) - })) { + } || ne.collectFirst{case u: UnresolvedFunction => u}.nonEmpty)) { None } else { val remappedNewProjList = newProjList.map(ne => ne match { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala index 0ab8691801d7..cce9e0b5cc15 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala @@ -186,7 +186,7 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession { .withColumn("d", col("c")) val pythonEvalNodes4 = collectBatchExec(df4.queryExecution.executedPlan) assert(pythonEvalNodes4.size == 1) - assert(pythonEvalNodes4.head.udfs.size == 1) + assert(pythonEvalNodes4.head.udfs.size == 2) val df5 = df.withColumns(Seq("c", "d"), Seq(batchedNondeterministicPythonUDF(col("a")), batchedNondeterministicPythonUDF(col("a")))) From e1899c04d9b1d75180da31ab07414209b0bbadaa Mon Sep 17 00:00:00 2001 From: ashahid Date: Thu, 23 Nov 2023 00:21:39 -0800 Subject: [PATCH 027/129] SPARK-45959 --- .../org/apache/spark/sql/internal/EasilyFlattenable.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 5b0e603c6896..a7e85714eb92 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -45,7 +45,10 @@ private[sql] object EasilyFlattenable { if (tinkeredOrNewNamedExprs.exists(ne => ne.references.exists { case attr: AttributeReference => attribsReassignedInProj.contains(attr) case u: UnresolvedAttribute => attribsReassignedInProj.exists(_.name == u.name) - } || ne.collectFirst{case u: UnresolvedFunction => u}.nonEmpty)) { + } || ne.collectFirst{ + case u: UnresolvedFunction => u + case ex if !ex.deterministic => ex + }.nonEmpty)) { None } else { val remappedNewProjList = newProjList.map(ne => ne match { From edb34430247ace8394ef4c539e4f7c985543fcde Mon Sep 17 00:00:00 2001 From: ashahid Date: Thu, 23 Nov 2023 12:23:58 -0800 Subject: [PATCH 028/129] SPARK-45959 --- .../org/apache/spark/sql/internal/EasilyFlattenable.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index a7e85714eb92..f47f4ee25f6c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.internal import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction} -import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, AttributeSet, NamedExpression} +import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, AttributeSet, NamedExpression, UserDefinedExpression} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} private[sql] object EasilyFlattenable { @@ -48,6 +48,7 @@ private[sql] object EasilyFlattenable { } || ne.collectFirst{ case u: UnresolvedFunction => u case ex if !ex.deterministic => ex + case ex if ex.isInstanceOf[UserDefinedExpression] => ex }.nonEmpty)) { None } else { From 7716ee9b93285639d8255cedc072eca7c72cb755 Mon Sep 17 00:00:00 2001 From: ashahid Date: Thu, 23 Nov 2023 17:08:48 -0800 Subject: [PATCH 029/129] SPARK-45959 --- .../org/apache/spark/sql/internal/EasilyFlattenable.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index f47f4ee25f6c..6a934636e68b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -44,9 +44,10 @@ private[sql] object EasilyFlattenable { }).map(_.toAttribute)).intersect(AttributeSet(child.output)) if (tinkeredOrNewNamedExprs.exists(ne => ne.references.exists { case attr: AttributeReference => attribsReassignedInProj.contains(attr) - case u: UnresolvedAttribute => attribsReassignedInProj.exists(_.name == u.name) + // case u: UnresolvedAttribute => attribsReassignedInProj.exists(_.name == u.name) } || ne.collectFirst{ case u: UnresolvedFunction => u + case u: UnresolvedAttribute => u case ex if !ex.deterministic => ex case ex if ex.isInstanceOf[UserDefinedExpression] => ex }.nonEmpty)) { @@ -61,13 +62,13 @@ private[sql] object EasilyFlattenable { case al: Alias => al.child case _ => x }).getOrElse(attr) - + /* case u: UnresolvedAttribute => projList.find( _.toAttribute.name == u.name).map(x => x match { case al: Alias => al.child case _ => x }).getOrElse(u) - + */ }).asInstanceOf[NamedExpression] }) Option(p.copy(projectList = remappedNewProjList)) From 10a76ba699828ee6e6bb5722426d48a2436e17ff Mon Sep 17 00:00:00 2001 From: ashahid Date: Thu, 23 Nov 2023 21:33:25 -0800 Subject: [PATCH 030/129] SPARK-45959 --- .../org/apache/spark/sql/internal/EasilyFlattenable.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 6a934636e68b..65862e0a1c4b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -44,12 +44,12 @@ private[sql] object EasilyFlattenable { }).map(_.toAttribute)).intersect(AttributeSet(child.output)) if (tinkeredOrNewNamedExprs.exists(ne => ne.references.exists { case attr: AttributeReference => attribsReassignedInProj.contains(attr) - // case u: UnresolvedAttribute => attribsReassignedInProj.exists(_.name == u.name) + case u: UnresolvedAttribute => attribsReassignedInProj.exists(_.name == u.name) } || ne.collectFirst{ case u: UnresolvedFunction => u - case u: UnresolvedAttribute => u case ex if !ex.deterministic => ex case ex if ex.isInstanceOf[UserDefinedExpression] => ex + case u: UnresolvedAttribute => u }.nonEmpty)) { None } else { @@ -62,13 +62,13 @@ private[sql] object EasilyFlattenable { case al: Alias => al.child case _ => x }).getOrElse(attr) - /* + case u: UnresolvedAttribute => projList.find( _.toAttribute.name == u.name).map(x => x match { case al: Alias => al.child case _ => x }).getOrElse(u) - */ + }).asInstanceOf[NamedExpression] }) Option(p.copy(projectList = remappedNewProjList)) From c95b35b6589919dfe462c50592639e83aa566171 Mon Sep 17 00:00:00 2001 From: ashahid Date: Fri, 24 Nov 2023 01:20:50 -0800 Subject: [PATCH 031/129] SPARK-45959 --- .../spark/sql/execution/python/ExtractPythonUDFsSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala index cce9e0b5cc15..0ab8691801d7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala @@ -186,7 +186,7 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession { .withColumn("d", col("c")) val pythonEvalNodes4 = collectBatchExec(df4.queryExecution.executedPlan) assert(pythonEvalNodes4.size == 1) - assert(pythonEvalNodes4.head.udfs.size == 2) + assert(pythonEvalNodes4.head.udfs.size == 1) val df5 = df.withColumns(Seq("c", "d"), Seq(batchedNondeterministicPythonUDF(col("a")), batchedNondeterministicPythonUDF(col("a")))) From e8b74f0154418a0e37093a0b0ee336c156544494 Mon Sep 17 00:00:00 2001 From: ashahid Date: Fri, 24 Nov 2023 12:09:01 -0800 Subject: [PATCH 032/129] SPARK-45959 --- .../scala/org/apache/spark/sql/Dataset.scala | 3 +- .../sql/internal/EasilyFlattenable.scala | 29 ++++++++----------- 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index fe40179df7bb..69878f691bdc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -1574,8 +1574,7 @@ class Dataset[T] private[sql]( } val newProjList = untypedCols.map(_.named) (logicalPlan, newProjList) match { - case EasilyFlattenable(flattendPlan) if !this.isStreaming => - flattendPlan + case EasilyFlattenable(flattendPlan) if !this.isStreaming => flattendPlan case _ => Project(newProjList, logicalPlan) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 65862e0a1c4b..6d89aa40a1a6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -42,9 +42,10 @@ private[sql] object EasilyFlattenable { case _: AttributeReference => false case _ => true }).map(_.toAttribute)).intersect(AttributeSet(child.output)) + if (tinkeredOrNewNamedExprs.exists(ne => ne.references.exists { case attr: AttributeReference => attribsReassignedInProj.contains(attr) - case u: UnresolvedAttribute => attribsReassignedInProj.exists(_.name == u.name) + case _: UnresolvedAttribute => true } || ne.collectFirst{ case u: UnresolvedFunction => u case ex if !ex.deterministic => ex @@ -53,24 +54,18 @@ private[sql] object EasilyFlattenable { }.nonEmpty)) { None } else { - val remappedNewProjList = newProjList.map(ne => ne match { + val remappedNewProjList = newProjList.map { case attr: AttributeReference => projList.find( _.toAttribute.canonicalized == attr.canonicalized).getOrElse(attr) - case anyOtherExpr => (anyOtherExpr transformUp { - case attr: AttributeReference => projList.find( - _.toAttribute.canonicalized == attr.canonicalized).map(x => x match { - case al: Alias => al.child - case _ => x - }).getOrElse(attr) - - case u: UnresolvedAttribute => projList.find( - _.toAttribute.name == u.name).map(x => x match { - case al: Alias => al.child - case _ => x - }).getOrElse(u) - - }).asInstanceOf[NamedExpression] - }) + case anyOtherExpr => + (anyOtherExpr transformUp { + case attr: AttributeReference => projList.find( + _.toAttribute.canonicalized == attr.canonicalized).map { + case al: Alias => al.child + case x => x + }.getOrElse(attr) + }).asInstanceOf[NamedExpression] + } Option(p.copy(projectList = remappedNewProjList)) } } else { From 056129311ddc981121c9daab612dc09a6781f17b Mon Sep 17 00:00:00 2001 From: ashahid Date: Sat, 25 Nov 2023 12:44:48 -0800 Subject: [PATCH 033/129] SPARK-45959 --- .../sql/internal/EasilyFlattenable.scala | 19 ++++++++++++++----- .../org/apache/spark/sql/DataFrameSuite.scala | 15 +++++++++++++++ 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 6d89aa40a1a6..a65074c880cd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.internal -import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction} +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, AttributeSet, NamedExpression, UserDefinedExpression} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} @@ -44,13 +44,16 @@ private[sql] object EasilyFlattenable { }).map(_.toAttribute)).intersect(AttributeSet(child.output)) if (tinkeredOrNewNamedExprs.exists(ne => ne.references.exists { - case attr: AttributeReference => attribsReassignedInProj.contains(attr) - case _: UnresolvedAttribute => true + case attr: AttributeReference => attribsReassignedInProj.contains(attr) + case u: UnresolvedAttribute => if (u.nameParts.size > 1) { + true + } else { + attribsReassignedInProj.exists(attr => attr.name == u.name) + } } || ne.collectFirst{ - case u: UnresolvedFunction => u case ex if !ex.deterministic => ex case ex if ex.isInstanceOf[UserDefinedExpression] => ex - case u: UnresolvedAttribute => u + case u: UnresolvedAttribute if u.nameParts.size != 1 => u }.nonEmpty)) { None } else { @@ -64,6 +67,12 @@ private[sql] object EasilyFlattenable { case al: Alias => al.child case x => x }.getOrElse(attr) + + case u: UnresolvedAttribute => projList.find( + _.toAttribute.name == u.name).map(x => x match { + case al: Alias => al.child + case _ => x + }).getOrElse(u) }).asInstanceOf[NamedExpression] } Option(p.copy(projectList = remappedNewProjList)) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 59759e34cab3..404d815815d9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -3703,6 +3703,21 @@ class DataFrameSuite extends QueryTest parameters = Map("viewName" -> "AUTHORIZATION")) } } + + test("withColumns: check no new project addition") { + val testDf =spark.range(1).select($"id" as "a", $"id" as "b") + val initNodes = testDf.queryExecution.logical.collect { + case l => l + } + + val newDf = testDf.withColumns(Seq("newCol1", "newCol2"), + Seq(col("a") + 1, col("b") + 2)) + + val newNodes = newDf.queryExecution.logical.collect { + case l => l + } + assert(initNodes.size === newNodes.size) + } } case class GroupByKey(a: Int, b: Int) From 38618737d51a36573d5518d657a9d80e13c00f5d Mon Sep 17 00:00:00 2001 From: ashahid Date: Sat, 25 Nov 2023 14:02:51 -0800 Subject: [PATCH 034/129] SPARK-45959 --- .../src/test/scala/org/apache/spark/sql/DataFrameSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 404d815815d9..7e72e78be351 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -3705,7 +3705,7 @@ class DataFrameSuite extends QueryTest } test("withColumns: check no new project addition") { - val testDf =spark.range(1).select($"id" as "a", $"id" as "b") + val testDf = spark.range(1).select($"id" as "a", $"id" as "b") val initNodes = testDf.queryExecution.logical.collect { case l => l } From 02b5719e1e1810237c6551a8714a18c6083c3571 Mon Sep 17 00:00:00 2001 From: ashahid Date: Sat, 25 Nov 2023 14:53:29 -0800 Subject: [PATCH 035/129] SPARK-45959 --- .../org/apache/spark/sql/internal/EasilyFlattenable.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index a65074c880cd..d4ef41f9faab 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.internal -import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.catalyst.analysis.{UnresolvedAlias, UnresolvedAttribute} import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, AttributeSet, NamedExpression, UserDefinedExpression} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} @@ -54,6 +54,7 @@ private[sql] object EasilyFlattenable { case ex if !ex.deterministic => ex case ex if ex.isInstanceOf[UserDefinedExpression] => ex case u: UnresolvedAttribute if u.nameParts.size != 1 => u + case u: UnresolvedAlias => u }.nonEmpty)) { None } else { From e613eacaddf42debc99ef36774b3b52a95cdc754 Mon Sep 17 00:00:00 2001 From: ashahid Date: Sat, 25 Nov 2023 15:13:20 -0800 Subject: [PATCH 036/129] SPARK-45959 --- .../org/apache/spark/sql/internal/EasilyFlattenable.scala | 3 ++- .../spark/sql/execution/python/ExtractPythonUDFsSuite.scala | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index d4ef41f9faab..1b0c22e7e47a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.internal -import org.apache.spark.sql.catalyst.analysis.{UnresolvedAlias, UnresolvedAttribute} +import org.apache.spark.sql.catalyst.analysis.{UnresolvedAlias, UnresolvedAttribute, UnresolvedFunction} import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, AttributeSet, NamedExpression, UserDefinedExpression} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} @@ -55,6 +55,7 @@ private[sql] object EasilyFlattenable { case ex if ex.isInstanceOf[UserDefinedExpression] => ex case u: UnresolvedAttribute if u.nameParts.size != 1 => u case u: UnresolvedAlias => u + case u : UnresolvedFunction if u.nameParts.size == 1 & u.nameParts.head == "struct" => u }.nonEmpty)) { None } else { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala index 0ab8691801d7..cce9e0b5cc15 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala @@ -186,7 +186,7 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession { .withColumn("d", col("c")) val pythonEvalNodes4 = collectBatchExec(df4.queryExecution.executedPlan) assert(pythonEvalNodes4.size == 1) - assert(pythonEvalNodes4.head.udfs.size == 1) + assert(pythonEvalNodes4.head.udfs.size == 2) val df5 = df.withColumns(Seq("c", "d"), Seq(batchedNondeterministicPythonUDF(col("a")), batchedNondeterministicPythonUDF(col("a")))) From f3eb88b425e414bbc6a12ba7c55754f85c730679 Mon Sep 17 00:00:00 2001 From: ashahid Date: Sun, 26 Nov 2023 02:23:01 -0800 Subject: [PATCH 037/129] SPARK-45959 --- python/pyspark/pandas/tests/computation/test_any_all.py | 2 +- .../org/apache/spark/sql/internal/EasilyFlattenable.scala | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/pyspark/pandas/tests/computation/test_any_all.py b/python/pyspark/pandas/tests/computation/test_any_all.py index 6c120aead4e6..0986c0d89152 100644 --- a/python/pyspark/pandas/tests/computation/test_any_all.py +++ b/python/pyspark/pandas/tests/computation/test_any_all.py @@ -1,4 +1,4 @@ -# + # # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 1b0c22e7e47a..32877f25dba0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -48,7 +48,7 @@ private[sql] object EasilyFlattenable { case u: UnresolvedAttribute => if (u.nameParts.size > 1) { true } else { - attribsReassignedInProj.exists(attr => attr.name == u.name) + attribsReassignedInProj.exists(attr => attr.name.equalsIgnoreCase(u.name)) } } || ne.collectFirst{ case ex if !ex.deterministic => ex @@ -71,7 +71,7 @@ private[sql] object EasilyFlattenable { }.getOrElse(attr) case u: UnresolvedAttribute => projList.find( - _.toAttribute.name == u.name).map(x => x match { + _.toAttribute.name.equalsIgnoreCase(u.name)).map(x => x match { case al: Alias => al.child case _ => x }).getOrElse(u) From 462b08d1999e818bc0137200e241e6f1e5c393c1 Mon Sep 17 00:00:00 2001 From: ashahid Date: Sun, 26 Nov 2023 11:28:46 -0800 Subject: [PATCH 038/129] SPARK-45959 --- .../sql/internal/EasilyFlattenable.scala | 43 +++++++++++-------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 32877f25dba0..373e1fa8456f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -17,10 +17,13 @@ package org.apache.spark.sql.internal +import scala.util.{Failure, Success, Try} + import org.apache.spark.sql.catalyst.analysis.{UnresolvedAlias, UnresolvedAttribute, UnresolvedFunction} import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, AttributeSet, NamedExpression, UserDefinedExpression} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} + private[sql] object EasilyFlattenable { def unapply(tuple: (LogicalPlan, Seq[NamedExpression])): Option[LogicalPlan] = { val (logicalPlan, newProjList) = tuple @@ -59,25 +62,31 @@ private[sql] object EasilyFlattenable { }.nonEmpty)) { None } else { - val remappedNewProjList = newProjList.map { - case attr: AttributeReference => projList.find( - _.toAttribute.canonicalized == attr.canonicalized).getOrElse(attr) - case anyOtherExpr => - (anyOtherExpr transformUp { - case attr: AttributeReference => projList.find( - _.toAttribute.canonicalized == attr.canonicalized).map { - case al: Alias => al.child - case x => x - }.getOrElse(attr) + val remappedNewProjListResult = Try { + newProjList.map { + case attr: AttributeReference => projList.find( + _.toAttribute.canonicalized == attr.canonicalized).getOrElse(attr) + case anyOtherExpr => + (anyOtherExpr transformUp { + case attr: AttributeReference => projList.find( + _.toAttribute.canonicalized == attr.canonicalized).map { + case al: Alias => al.child + case x => x + }.getOrElse(attr) - case u: UnresolvedAttribute => projList.find( - _.toAttribute.name.equalsIgnoreCase(u.name)).map(x => x match { - case al: Alias => al.child - case _ => x - }).getOrElse(u) - }).asInstanceOf[NamedExpression] + case u: UnresolvedAttribute => projList.find( + _.toAttribute.name.equalsIgnoreCase(u.name)).map(x => x match { + case al: Alias => al.child + case _ => x + }).getOrElse(throw new UnsupportedOperationException("Not able to flatten" + + s" unresolved attribute $u")) + }).asInstanceOf[NamedExpression] + } + } + remappedNewProjListResult match { + case Success(remappedNewProjList) => Option(p.copy(projectList = remappedNewProjList)) + case Failure(_) => None } - Option(p.copy(projectList = remappedNewProjList)) } } else { // for now None From 8f740784e8bf78ccc0beecd474fe5996a2ad4111 Mon Sep 17 00:00:00 2001 From: ashahid Date: Sun, 26 Nov 2023 13:29:43 -0800 Subject: [PATCH 039/129] SPARK-45959 --- .../org/apache/spark/sql/internal/EasilyFlattenable.scala | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 373e1fa8456f..53a03123adf6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -77,6 +77,9 @@ private[sql] object EasilyFlattenable { case u: UnresolvedAttribute => projList.find( _.toAttribute.name.equalsIgnoreCase(u.name)).map(x => x match { case al: Alias => al.child + case u: UnresolvedAttribute => + throw new UnsupportedOperationException("Not able to flatten" + + s" unresolved attribute $u") case _ => x }).getOrElse(throw new UnsupportedOperationException("Not able to flatten" + s" unresolved attribute $u")) From df4e6b479ba247487d74dd8af25d9e4053bf9b9f Mon Sep 17 00:00:00 2001 From: ashahid Date: Sun, 26 Nov 2023 20:25:58 -0800 Subject: [PATCH 040/129] SPARK-45959 --- .../spark/sql/connect/planner/SparkConnectPlanner.scala | 3 ++- .../org/apache/spark/sql/internal/EasilyFlattenable.scala | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala index 95c5acc803d4..38bee2296a88 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala @@ -197,7 +197,8 @@ class SparkConnectPlanner( case _ => throw InvalidPlanInput(s"${rel.getUnknown} not supported.") } - if (rel.hasCommon && rel.getCommon.hasPlanId) { + if (rel.hasCommon && rel.getCommon.hasPlanId && + plan.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty) { plan.setTagValue(LogicalPlan.PLAN_ID_TAG, rel.getCommon.getPlanId) } plan diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 53a03123adf6..1e8e975bd975 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -87,7 +87,11 @@ private[sql] object EasilyFlattenable { } } remappedNewProjListResult match { - case Success(remappedNewProjList) => Option(p.copy(projectList = remappedNewProjList)) + case Success(remappedNewProjList) => val pidOpt = p.getTagValue( + LogicalPlan.PLAN_ID_TAG) + val newProj = p.copy(projectList = remappedNewProjList) + pidOpt.foreach(id => newProj.setTagValue[Long](LogicalPlan.PLAN_ID_TAG, id)) + Option(newProj) case Failure(_) => None } } From 3701a8bba9c1b806b58b0e2719d274929c3fe5c4 Mon Sep 17 00:00:00 2001 From: ashahid Date: Sun, 26 Nov 2023 22:31:08 -0800 Subject: [PATCH 041/129] SPARK-45959 --- .../org/apache/spark/sql/internal/EasilyFlattenable.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 1e8e975bd975..23f644c970e2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -56,7 +56,8 @@ private[sql] object EasilyFlattenable { } || ne.collectFirst{ case ex if !ex.deterministic => ex case ex if ex.isInstanceOf[UserDefinedExpression] => ex - case u: UnresolvedAttribute if u.nameParts.size != 1 => u + case u: UnresolvedAttribute if u.nameParts.size != 1 || + u.getTagValue(LogicalPlan.PLAN_ID_TAG).isDefined => u case u: UnresolvedAlias => u case u : UnresolvedFunction if u.nameParts.size == 1 & u.nameParts.head == "struct" => u }.nonEmpty)) { From d19844716811a8b642be2c60f23fa3a013f7c501 Mon Sep 17 00:00:00 2001 From: ashahid Date: Sun, 26 Nov 2023 22:59:57 -0800 Subject: [PATCH 042/129] SPARK-45959 --- python/pyspark/pandas/tests/computation/test_any_all.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/pandas/tests/computation/test_any_all.py b/python/pyspark/pandas/tests/computation/test_any_all.py index 0986c0d89152..6c120aead4e6 100644 --- a/python/pyspark/pandas/tests/computation/test_any_all.py +++ b/python/pyspark/pandas/tests/computation/test_any_all.py @@ -1,4 +1,4 @@ - # +# # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. From 249075d141fb689adca168b8cb1a6742a1dbbe68 Mon Sep 17 00:00:00 2001 From: ashahid Date: Sun, 26 Nov 2023 23:39:35 -0800 Subject: [PATCH 043/129] SPARK-45959 --- .../spark/sql/connect/planner/SparkConnectPlanner.scala | 3 +-- .../org/apache/spark/sql/internal/EasilyFlattenable.scala | 7 ++----- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala index 38bee2296a88..95c5acc803d4 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala @@ -197,8 +197,7 @@ class SparkConnectPlanner( case _ => throw InvalidPlanInput(s"${rel.getUnknown} not supported.") } - if (rel.hasCommon && rel.getCommon.hasPlanId && - plan.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty) { + if (rel.hasCommon && rel.getCommon.hasPlanId) { plan.setTagValue(LogicalPlan.PLAN_ID_TAG, rel.getCommon.getPlanId) } plan diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 23f644c970e2..10e7b7c7040e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -56,8 +56,7 @@ private[sql] object EasilyFlattenable { } || ne.collectFirst{ case ex if !ex.deterministic => ex case ex if ex.isInstanceOf[UserDefinedExpression] => ex - case u: UnresolvedAttribute if u.nameParts.size != 1 || - u.getTagValue(LogicalPlan.PLAN_ID_TAG).isDefined => u + case u: UnresolvedAttribute if u.nameParts.size != 1 => u case u: UnresolvedAlias => u case u : UnresolvedFunction if u.nameParts.size == 1 & u.nameParts.head == "struct" => u }.nonEmpty)) { @@ -88,10 +87,8 @@ private[sql] object EasilyFlattenable { } } remappedNewProjListResult match { - case Success(remappedNewProjList) => val pidOpt = p.getTagValue( - LogicalPlan.PLAN_ID_TAG) + case Success(remappedNewProjList) => val newProj = p.copy(projectList = remappedNewProjList) - pidOpt.foreach(id => newProj.setTagValue[Long](LogicalPlan.PLAN_ID_TAG, id)) Option(newProj) case Failure(_) => None } From 77ab011fb70c599699fb63ebe47d6f5f97154c91 Mon Sep 17 00:00:00 2001 From: ashahid Date: Mon, 27 Nov 2023 01:27:09 -0800 Subject: [PATCH 044/129] SPARK-45959 --- .../sql/connect/planner/SparkConnectPlanner.scala | 11 ++++++----- .../sql/catalyst/plans/logical/LogicalPlan.scala | 1 + .../src/main/scala/org/apache/spark/sql/Dataset.scala | 3 ++- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala index 95c5acc803d4..75aa00d080bf 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala @@ -958,9 +958,9 @@ class SparkConnectPlanner( } private def transformWithColumnsRenamed(rel: proto.WithColumnsRenamed): LogicalPlan = { - Dataset - .ofRows(session, transformRelation(rel.getInput)) - .withColumnsRenamed(rel.getRenameColumnsMapMap) + val ds = Dataset.ofRows(session, transformRelation(rel.getInput)) + ds.logicalPlan.setTagValue[Boolean](LogicalPlan.SKIP_FLATTENING, true) + ds.withColumnsRenamed(rel.getRenameColumnsMapMap) .logicalPlan } @@ -981,9 +981,10 @@ class SparkConnectPlanner( (alias.getName(0), Column(transformExpression(alias.getExpr)), metadata) }.unzip3 - Dataset + val ds = Dataset .ofRows(session, transformRelation(rel.getInput)) - .withColumns(colNames, cols, metadata) + ds.logicalPlan.setTagValue[Boolean](LogicalPlan.SKIP_FLATTENING, true) + ds.withColumns(colNames, cols, metadata) .logicalPlan } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index cce385e8d9d1..9726a031e24b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -198,6 +198,7 @@ object LogicalPlan { // to the old code path. private[spark] val PLAN_ID_TAG = TreeNodeTag[Long]("plan_id") private[spark] val IS_METADATA_COL = TreeNodeTag[Unit]("is_metadata_col") + private[spark] val SKIP_FLATTENING = TreeNodeTag[Boolean]("skipFlattening") } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 69878f691bdc..6d1d963d0e62 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -1574,7 +1574,8 @@ class Dataset[T] private[sql]( } val newProjList = untypedCols.map(_.named) (logicalPlan, newProjList) match { - case EasilyFlattenable(flattendPlan) if !this.isStreaming => flattendPlan + case EasilyFlattenable(flattendPlan) if !this.isStreaming && + !logicalPlan.getTagValue(LogicalPlan.SKIP_FLATTENING).getOrElse(false) => flattendPlan case _ => Project(newProjList, logicalPlan) } From c613a187e05a84d9302dc24402f5e23bb08082e7 Mon Sep 17 00:00:00 2001 From: ashahid Date: Mon, 27 Nov 2023 09:37:57 -0800 Subject: [PATCH 045/129] SPARK-45959 --- .../sql/connect/planner/SparkConnectPlanner.scala | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala index 75aa00d080bf..82255b1afaba 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala @@ -959,9 +959,8 @@ class SparkConnectPlanner( private def transformWithColumnsRenamed(rel: proto.WithColumnsRenamed): LogicalPlan = { val ds = Dataset.ofRows(session, transformRelation(rel.getInput)) - ds.logicalPlan.setTagValue[Boolean](LogicalPlan.SKIP_FLATTENING, true) - ds.withColumnsRenamed(rel.getRenameColumnsMapMap) - .logicalPlan + ds.logicalPlan.setTagValue(LogicalPlan.SKIP_FLATTENING, true) + ds.withColumnsRenamed(rel.getRenameColumnsMapMap).logicalPlan } private def transformWithColumns(rel: proto.WithColumns): LogicalPlan = { @@ -981,11 +980,9 @@ class SparkConnectPlanner( (alias.getName(0), Column(transformExpression(alias.getExpr)), metadata) }.unzip3 - val ds = Dataset - .ofRows(session, transformRelation(rel.getInput)) - ds.logicalPlan.setTagValue[Boolean](LogicalPlan.SKIP_FLATTENING, true) - ds.withColumns(colNames, cols, metadata) - .logicalPlan + val ds = Dataset.ofRows(session, transformRelation(rel.getInput)) + ds.logicalPlan.setTagValue(LogicalPlan.SKIP_FLATTENING, true) + ds.withColumns(colNames, cols, metadata).logicalPlan } private def transformWithWatermark(rel: proto.WithWatermark): LogicalPlan = { From 8e80db22442992e219b01bc27b919a54b90a37d9 Mon Sep 17 00:00:00 2001 From: ashahid Date: Mon, 27 Nov 2023 15:51:29 -0800 Subject: [PATCH 046/129] SPARK-45959 --- .../sql/internal/EasilyFlattenable.scala | 12 ++- .../spark/sql/AddColumnsFlattenSuite.scala | 75 +++++++++++++++++++ .../org/apache/spark/sql/DataFrameSuite.scala | 15 ---- 3 files changed, 83 insertions(+), 19 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 10e7b7c7040e..6da8aa2838e4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -40,18 +40,22 @@ private[sql] object EasilyFlattenable { if (passThruAttribs.size == currentOutputAttribs.size && passThruAttribs.forall( currentOutputAttribs.contains) && tinkeredOrNewNamedExprs.nonEmpty) { + val x = AttributeSet(projList.filter(ne => ne match { + case _: AttributeReference => false + case _ => true + }).map(_.toAttribute)) - val attribsReassignedInProj = AttributeSet(projList.filter(ne => ne match { + val attribsReassignedInProj = projList.filter(ne => ne match { case _: AttributeReference => false case _ => true - }).map(_.toAttribute)).intersect(AttributeSet(child.output)) + }).map(_.name).toSet.intersect(child.output.map(_.name).toSet) if (tinkeredOrNewNamedExprs.exists(ne => ne.references.exists { - case attr: AttributeReference => attribsReassignedInProj.contains(attr) + case attr: AttributeReference => attribsReassignedInProj.contains(attr.name) case u: UnresolvedAttribute => if (u.nameParts.size > 1) { true } else { - attribsReassignedInProj.exists(attr => attr.name.equalsIgnoreCase(u.name)) + attribsReassignedInProj.contains(u.name) } } || ne.collectFirst{ case ex if !ex.deterministic => ex diff --git a/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala new file mode 100644 index 000000000000..546c4bde7122 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.test.SharedSparkSession + + +class AddColumnsFlattenSuite extends QueryTest + with SharedSparkSession with AdaptiveSparkPlanHelper { + import testImplicits._ + + test("withColumns: check no new project addition for simple columns addition") { + val testDf = spark.range(1).select($"id" as "a", $"id" as "b") + val initNodes = testDf.queryExecution.logical.collect { + case l => l + } + + val newDf = testDf.withColumns(Seq("newCol1", "newCol2"), + Seq(col("a") + 1, col("b") + 2)) + + val newNodes = newDf.queryExecution.logical.collect { + case l => l + } + assert(initNodes.size === newNodes.size) + } + + test("withColumns: check no new project addition if redefined alias is not used in" + + "new columns") { + val testDf = spark.range(1).select($"id" as "a", $"id" as "b").select($"a" + 1 as "a", + $"b") + val initNodes = testDf.queryExecution.logical.collect { + case l => l + } + + val newDf = testDf.withColumns(Seq("newCol1"), Seq(col("b") + 2)) + + val newNodes = newDf.queryExecution.logical.collect { + case l => l + } + assert(initNodes.size === newNodes.size) + } + + test("withColumns: new project addition if redefined alias is used in new columns") { + val testDf = spark.range(1).select($"id" as "a", $"id" as "b").select($"a" + 1 as "a", + $"b") + val initNodes = testDf.queryExecution.logical.collect { + case l => l + } + + val newDf = testDf.withColumns(Seq("newCol1"), Seq(col("a") + 2)) + + val newNodes = newDf.queryExecution.logical.collect { + case l => l + } + assert(newNodes.size === initNodes.size + 1) + } +} + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 7e72e78be351..59759e34cab3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -3703,21 +3703,6 @@ class DataFrameSuite extends QueryTest parameters = Map("viewName" -> "AUTHORIZATION")) } } - - test("withColumns: check no new project addition") { - val testDf = spark.range(1).select($"id" as "a", $"id" as "b") - val initNodes = testDf.queryExecution.logical.collect { - case l => l - } - - val newDf = testDf.withColumns(Seq("newCol1", "newCol2"), - Seq(col("a") + 1, col("b") + 2)) - - val newNodes = newDf.queryExecution.logical.collect { - case l => l - } - assert(initNodes.size === newNodes.size) - } } case class GroupByKey(a: Int, b: Int) From bb4f914672e4f625b7453bce68e9d3d494c2e5e6 Mon Sep 17 00:00:00 2001 From: ashahid Date: Mon, 27 Nov 2023 20:27:30 -0800 Subject: [PATCH 047/129] SPARK-45959 --- .../sql/internal/EasilyFlattenable.scala | 45 +++++++++++++------ .../spark/sql/AddColumnsFlattenSuite.scala | 14 ++++++ 2 files changed, 46 insertions(+), 13 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 6da8aa2838e4..c4d0d60dad79 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -37,14 +37,11 @@ private[sql] object EasilyFlattenable { case _ => false } val currentOutputAttribs = AttributeSet(p.output) - - if (passThruAttribs.size == currentOutputAttribs.size && passThruAttribs.forall( - currentOutputAttribs.contains) && tinkeredOrNewNamedExprs.nonEmpty) { - val x = AttributeSet(projList.filter(ne => ne match { - case _: AttributeReference => false - case _ => true - }).map(_.toAttribute)) - + val passThruAttribsContainedInCurrentOutput = passThruAttribs.forall( + currentOutputAttribs.contains) + if (passThruAttribs.size == currentOutputAttribs.size && + passThruAttribsContainedInCurrentOutput && tinkeredOrNewNamedExprs.nonEmpty) { + // case of new columns being added only val attribsReassignedInProj = projList.filter(ne => ne match { case _: AttributeReference => false case _ => true @@ -79,13 +76,13 @@ private[sql] object EasilyFlattenable { }.getOrElse(attr) case u: UnresolvedAttribute => projList.find( - _.toAttribute.name.equalsIgnoreCase(u.name)).map(x => x match { + _.toAttribute.name.equalsIgnoreCase(u.name)).map { case al: Alias => al.child case u: UnresolvedAttribute => throw new UnsupportedOperationException("Not able to flatten" + - s" unresolved attribute $u") - case _ => x - }).getOrElse(throw new UnsupportedOperationException("Not able to flatten" + + s" unresolved attribute $u") + case x => x + }.getOrElse(throw new UnsupportedOperationException("Not able to flatten" + s" unresolved attribute $u")) }).asInstanceOf[NamedExpression] } @@ -97,8 +94,30 @@ private[sql] object EasilyFlattenable { case Failure(_) => None } } + } else if (passThruAttribs.size + tinkeredOrNewNamedExprs.size == currentOutputAttribs.size + && passThruAttribsContainedInCurrentOutput && tinkeredOrNewNamedExprs.forall( + ne => ne match { + case Alias(_: AttributeReference, _) => true + case _ => false + })) { + // case of renaming of columns + val remappedNewProjListResult = newProjList.map { + case attr: AttributeReference => projList.find( + _.toAttribute.canonicalized == attr.canonicalized).getOrElse(attr) + + case al @ Alias(ar: AttributeReference, name) => + projList.find( + _.toAttribute.canonicalized == ar.canonicalized).map { + case al: Alias => al.copy(name = name)(exprId = al.exprId, + qualifier = al.qualifier, explicitMetadata = al.explicitMetadata, + nonInheritableMetadataKeys = al.nonInheritableMetadataKeys) + + case _: AttributeReference => al + }.get + } + + Option(p.copy(projectList = remappedNewProjListResult)) } else { - // for now None None } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala index 546c4bde7122..fd28a4e522ea 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala @@ -71,5 +71,19 @@ class AddColumnsFlattenSuite extends QueryTest } assert(newNodes.size === initNodes.size + 1) } + + test("withColumns: remap of column should result in project addition") { + val testDf = spark.range(1).select($"id" as "a", $"id" as "b") + val initNodes = testDf.queryExecution.logical.collect { + case l => l + } + + val newDf = testDf.withColumnRenamed("a", "c") + + val newNodes = newDf.queryExecution.logical.collect { + case l => l + } + assert(newNodes.size === initNodes.size) + } } From d06129f3030395afeb349f856d80d576ecf97b36 Mon Sep 17 00:00:00 2001 From: ashahid Date: Tue, 28 Nov 2023 01:02:38 -0800 Subject: [PATCH 048/129] SPARK-45959 --- .../spark/sql/internal/EasilyFlattenable.scala | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index c4d0d60dad79..527b3b9fb8aa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -97,18 +97,24 @@ private[sql] object EasilyFlattenable { } else if (passThruAttribs.size + tinkeredOrNewNamedExprs.size == currentOutputAttribs.size && passThruAttribsContainedInCurrentOutput && tinkeredOrNewNamedExprs.forall( ne => ne match { - case Alias(_: AttributeReference, _) => true + case Alias(x: AttributeReference, _) => + val toCheck = projList.find(_.toAttribute.canonicalized == x.canonicalized).get + toCheck match { + case _: AttributeReference => true + case al: Alias => passThruAttribs.forall( + _.toAttribute.canonicalized != al.toAttribute.canonicalized ) + } case _ => false })) { // case of renaming of columns val remappedNewProjListResult = newProjList.map { case attr: AttributeReference => projList.find( - _.toAttribute.canonicalized == attr.canonicalized).getOrElse(attr) + _.toAttribute.canonicalized == attr.canonicalized).get case al @ Alias(ar: AttributeReference, name) => projList.find( _.toAttribute.canonicalized == ar.canonicalized).map { - case al: Alias => al.copy(name = name)(exprId = al.exprId, + case alx: Alias => alx.copy(name = name)(exprId = al.exprId, qualifier = al.qualifier, explicitMetadata = al.explicitMetadata, nonInheritableMetadataKeys = al.nonInheritableMetadataKeys) From 80761e8ac3d10688450a7a0ea5ace3e2204e06ce Mon Sep 17 00:00:00 2001 From: ashahid Date: Tue, 28 Nov 2023 07:57:47 -0800 Subject: [PATCH 049/129] SPARK-45959 --- .../resources/query-tests/explain-results/crosstab.explain | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/crosstab.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/crosstab.explain index a30cd136e8db..0487d7360201 100644 --- a/connector/connect/common/src/test/resources/query-tests/explain-results/crosstab.explain +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/crosstab.explain @@ -1,5 +1,4 @@ Project [a_b#0] -+- Project [a_b#0] - +- Aggregate [a_b#0], [a_b#0, pivotfirst(__pivot_col#0, count(1) AS count#0L, 0, 0) AS __pivot_count(1) AS count AS `count(1) AS count`#0] - +- Aggregate [CASE WHEN isnull(a#0) THEN null ELSE cast(a#0 as string) END, CASE WHEN isnull(b#0) THEN null ELSE regexp_replace(cast(b#0 as string), `, , 1) END], [CASE WHEN isnull(a#0) THEN null ELSE cast(a#0 as string) END AS a_b#0, CASE WHEN isnull(b#0) THEN null ELSE regexp_replace(cast(b#0 as string), `, , 1) END AS __pivot_col#0, count(1) AS count(1) AS count#0L] - +- LocalRelation , [id#0L, a#0, b#0] ++- Aggregate [a_b#0], [a_b#0, pivotfirst(__pivot_col#0, count(1) AS count#0L, 0, 0) AS __pivot_count(1) AS count AS `count(1) AS count`#0] + +- Aggregate [CASE WHEN isnull(a#0) THEN null ELSE cast(a#0 as string) END, CASE WHEN isnull(b#0) THEN null ELSE regexp_replace(cast(b#0 as string), `, , 1) END], [CASE WHEN isnull(a#0) THEN null ELSE cast(a#0 as string) END AS a_b#0, CASE WHEN isnull(b#0) THEN null ELSE regexp_replace(cast(b#0 as string), `, , 1) END AS __pivot_col#0, count(1) AS count(1) AS count#0L] + +- LocalRelation , [id#0L, a#0, b#0] From 00e38c4337c33d0f17f4a51f3a4c66eed73e4ce9 Mon Sep 17 00:00:00 2001 From: ashahid Date: Tue, 28 Nov 2023 14:27:24 -0800 Subject: [PATCH 050/129] SPARK-45959 --- .../scala/org/apache/spark/sql/Dataset.scala | 9 +- .../sql/internal/EasilyFlattenable.scala | 184 ++++++++++-------- .../spark/sql/AddColumnsFlattenSuite.scala | 70 ++++++- 3 files changed, 178 insertions(+), 85 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 3c6533dbfd97..d2ee56fa853a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -2942,7 +2942,14 @@ class Dataset[T] private[sql]( SchemaUtils.checkColumnNameDuplication( projectList.map(_.name), sparkSession.sessionState.conf.caseSensitiveAnalysis) - withPlan(Project(projectList, logicalPlan)) + withPlan( + (logicalPlan, projectList) match { + case EasilyFlattenable(flattendPlan) if !this.isStreaming && + !logicalPlan.getTagValue(LogicalPlan.SKIP_FLATTENING).getOrElse(false) => flattendPlan + + case _ => Project(projectList, logicalPlan) + } + ) } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 527b3b9fb8aa..a626ce60acb2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -25,11 +25,15 @@ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} private[sql] object EasilyFlattenable { + object OpType extends Enumeration { + type OpType = Value + val AddNewColumnsOnly, RemapOnly, Unknown = Value + } + def unapply(tuple: (LogicalPlan, Seq[NamedExpression])): Option[LogicalPlan] = { val (logicalPlan, newProjList) = tuple logicalPlan match { case p@Project(projList, child) => - // In the new column list identify those Named Expressions which are just attributes and // hence pass thru val (passThruAttribs, tinkeredOrNewNamedExprs) = newProjList.partition { @@ -39,95 +43,111 @@ private[sql] object EasilyFlattenable { val currentOutputAttribs = AttributeSet(p.output) val passThruAttribsContainedInCurrentOutput = passThruAttribs.forall( currentOutputAttribs.contains) - if (passThruAttribs.size == currentOutputAttribs.size && - passThruAttribsContainedInCurrentOutput && tinkeredOrNewNamedExprs.nonEmpty) { - // case of new columns being added only - val attribsReassignedInProj = projList.filter(ne => ne match { - case _: AttributeReference => false - case _ => true - }).map(_.name).toSet.intersect(child.output.map(_.name).toSet) - - if (tinkeredOrNewNamedExprs.exists(ne => ne.references.exists { + val opType = identifyOp(passThruAttribs, currentOutputAttribs, tinkeredOrNewNamedExprs, + passThruAttribsContainedInCurrentOutput) + + opType match { + case OpType.AddNewColumnsOnly => + // case of new columns being added only + val attribsReassignedInProj = projList.filter(ne => ne match { + case _: AttributeReference => false + case _ => true + }).map(_.name).toSet.intersect(child.output.map(_.name).toSet) + + if (tinkeredOrNewNamedExprs.exists(ne => ne.references.exists { case attr: AttributeReference => attribsReassignedInProj.contains(attr.name) case u: UnresolvedAttribute => if (u.nameParts.size > 1) { - true - } else { - attribsReassignedInProj.contains(u.name) - } - } || ne.collectFirst{ - case ex if !ex.deterministic => ex - case ex if ex.isInstanceOf[UserDefinedExpression] => ex - case u: UnresolvedAttribute if u.nameParts.size != 1 => u - case u: UnresolvedAlias => u - case u : UnresolvedFunction if u.nameParts.size == 1 & u.nameParts.head == "struct" => u - }.nonEmpty)) { - None - } else { - val remappedNewProjListResult = Try { - newProjList.map { - case attr: AttributeReference => projList.find( - _.toAttribute.canonicalized == attr.canonicalized).getOrElse(attr) - case anyOtherExpr => - (anyOtherExpr transformUp { - case attr: AttributeReference => projList.find( - _.toAttribute.canonicalized == attr.canonicalized).map { - case al: Alias => al.child - case x => x - }.getOrElse(attr) - - case u: UnresolvedAttribute => projList.find( - _.toAttribute.name.equalsIgnoreCase(u.name)).map { - case al: Alias => al.child - case u: UnresolvedAttribute => - throw new UnsupportedOperationException("Not able to flatten" + - s" unresolved attribute $u") - case x => x - }.getOrElse(throw new UnsupportedOperationException("Not able to flatten" + - s" unresolved attribute $u")) - }).asInstanceOf[NamedExpression] + true + } else { + attribsReassignedInProj.contains(u.name) + } + } || ne.collectFirst { + case ex if !ex.deterministic => ex + case ex if ex.isInstanceOf[UserDefinedExpression] => ex + case u: UnresolvedAttribute if u.nameParts.size != 1 => u + case u: UnresolvedAlias => u + case u: UnresolvedFunction if u.nameParts.size == 1 & u.nameParts.head == "struct" => + u + }.nonEmpty)) { + None + } else { + val remappedNewProjListResult = Try { + newProjList.map { + case attr: AttributeReference => projList.find( + _.toAttribute.canonicalized == attr.canonicalized).getOrElse(attr) + case anyOtherExpr => + (anyOtherExpr transformUp { + case attr: AttributeReference => projList.find( + _.toAttribute.canonicalized == attr.canonicalized).map { + case al: Alias => al.child + case x => x + }.getOrElse(attr) + + case u: UnresolvedAttribute => projList.find( + _.toAttribute.name.equalsIgnoreCase(u.name)).map { + case al: Alias => al.child + case u: UnresolvedAttribute => + throw new UnsupportedOperationException("Not able to flatten" + + s" unresolved attribute $u") + case x => x + }.getOrElse(throw new UnsupportedOperationException("Not able to flatten" + + s" unresolved attribute $u")) + }).asInstanceOf[NamedExpression] + } + } + remappedNewProjListResult match { + case Success(remappedNewProjList) => + val newProj = p.copy(projectList = remappedNewProjList) + Option(newProj) + case Failure(_) => None } } - remappedNewProjListResult match { - case Success(remappedNewProjList) => - val newProj = p.copy(projectList = remappedNewProjList) - Option(newProj) - case Failure(_) => None + + case OpType.RemapOnly => + // case of renaming of columns + val remappedNewProjListResult = newProjList.map { + case attr: AttributeReference => projList.find( + _.toAttribute.canonicalized == attr.canonicalized).get + + case al@Alias(ar: AttributeReference, name) => + projList.find( + _.toAttribute.canonicalized == ar.canonicalized).map { + case alx: Alias => alx.copy(name = name)(exprId = al.exprId, + qualifier = al.qualifier, explicitMetadata = al.explicitMetadata, + nonInheritableMetadataKeys = al.nonInheritableMetadataKeys) + + case _: AttributeReference => al + }.get } - } - } else if (passThruAttribs.size + tinkeredOrNewNamedExprs.size == currentOutputAttribs.size - && passThruAttribsContainedInCurrentOutput && tinkeredOrNewNamedExprs.forall( - ne => ne match { - case Alias(x: AttributeReference, _) => - val toCheck = projList.find(_.toAttribute.canonicalized == x.canonicalized).get - toCheck match { - case _: AttributeReference => true - case al: Alias => passThruAttribs.forall( - _.toAttribute.canonicalized != al.toAttribute.canonicalized ) - } - case _ => false - })) { - // case of renaming of columns - val remappedNewProjListResult = newProjList.map { - case attr: AttributeReference => projList.find( - _.toAttribute.canonicalized == attr.canonicalized).get - - case al @ Alias(ar: AttributeReference, name) => - projList.find( - _.toAttribute.canonicalized == ar.canonicalized).map { - case alx: Alias => alx.copy(name = name)(exprId = al.exprId, - qualifier = al.qualifier, explicitMetadata = al.explicitMetadata, - nonInheritableMetadataKeys = al.nonInheritableMetadataKeys) - - case _: AttributeReference => al - }.get - } - - Option(p.copy(projectList = remappedNewProjListResult)) - } else { - None + + Option(p.copy(projectList = remappedNewProjListResult)) + + case _ => None } case _ => None } } + + private def identifyOp( + passThruAttribs: Seq[NamedExpression], + currentOutputAttribs: AttributeSet, + tinkeredOrNewNamedExprs: Seq[NamedExpression], + passThruAttribsContainedInCurrentOutput: Boolean + ): OpType.OpType = { + + if (passThruAttribs.size == currentOutputAttribs.size && + passThruAttribsContainedInCurrentOutput && tinkeredOrNewNamedExprs.nonEmpty) { + OpType.AddNewColumnsOnly + } else if (passThruAttribs.size + tinkeredOrNewNamedExprs.size == currentOutputAttribs.size + && passThruAttribsContainedInCurrentOutput && tinkeredOrNewNamedExprs.forall { + case Alias(_: AttributeReference, _) => true + + case _ => false + }) { + OpType.RemapOnly + } else { + OpType.Unknown + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala index fd28a4e522ea..fffec2a376ac 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala @@ -42,7 +42,7 @@ class AddColumnsFlattenSuite extends QueryTest } test("withColumns: check no new project addition if redefined alias is not used in" + - "new columns") { + " new columns") { val testDf = spark.range(1).select($"id" as "a", $"id" as "b").select($"a" + 1 as "a", $"b") val initNodes = testDf.queryExecution.logical.collect { @@ -72,7 +72,8 @@ class AddColumnsFlattenSuite extends QueryTest assert(newNodes.size === initNodes.size + 1) } - test("withColumns: remap of column should result in project addition") { + test("withColumns: remap of column should not result in new project if the source of remap" + + "is not used in other cols") { val testDf = spark.range(1).select($"id" as "a", $"id" as "b") val initNodes = testDf.queryExecution.logical.collect { case l => l @@ -85,5 +86,70 @@ class AddColumnsFlattenSuite extends QueryTest } assert(newNodes.size === initNodes.size) } + + test("withColumns: remap of column should not result in new project if the source of remap is" + + "an attribute used in other cols") { + val testDf = spark.range(1).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b") + val initNodes = testDf.queryExecution.logical.collect { + case l => l + } + + val newDf = testDf.withColumnRenamed("a", "d") + + val newNodes = newDf.queryExecution.logical.collect { + case l => l + } + assert(newNodes.size === initNodes.size ) + } + + test("withColumns: remap of column should not result in new project if the remap" + + " is on an alias") { + val testDf = spark.range(1).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d" ) + val initNodes = testDf.queryExecution.logical.collect { + case l => l + } + + val newDf = testDf.withColumnRenamed("d", "x") + + val newNodes = newDf.queryExecution.logical.collect { + case l => l + } + assert(newNodes.size === initNodes.size) + } + + test("withColumns: remap of column should not result in new project if the remap" + + " source an alias and that attribute is also projected as another attribute") { + val testDf = spark.range(1).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d"). + select($"c", $"a", $"b", $"d", $"d" as "k") + val initNodes = testDf.queryExecution.logical.collect { + case l => l + } + + val newDf = testDf.withColumnRenamed("d", "x") + + val newNodes = newDf.queryExecution.logical.collect { + case l => l + } + assert(newNodes.size === initNodes.size) + } + + test("withColumns: test multi column remap") { + val testDf = spark.range(1).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") + + val initNodes = testDf.queryExecution.logical.collect { + case l => l + } + + val newDf = testDf.withColumnsRenamed(Map("d" -> "x", "c" -> "k", "a" -> "u")) + + val newNodes = newDf.queryExecution.logical.collect { + case l => l + } + assert(newNodes.size === initNodes.size) + } } From dd0d168778fd809a46948a494293c88ff5599223 Mon Sep 17 00:00:00 2001 From: ashahid Date: Tue, 28 Nov 2023 15:09:13 -0800 Subject: [PATCH 051/129] SPARK-45959 --- .../spark/sql/AddColumnsFlattenSuite.scala | 54 ++++++++++++++++--- 1 file changed, 47 insertions(+), 7 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala index fffec2a376ac..ad0d4a353128 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper +import org.apache.spark.sql.execution.columnar.InMemoryRelation import org.apache.spark.sql.functions._ import org.apache.spark.sql.test.SharedSparkSession @@ -72,8 +73,8 @@ class AddColumnsFlattenSuite extends QueryTest assert(newNodes.size === initNodes.size + 1) } - test("withColumns: remap of column should not result in new project if the source of remap" + - "is not used in other cols") { + test("withColumnRenamed: remap of column should not result in new project if the source" + + " of remap is not used in other cols") { val testDf = spark.range(1).select($"id" as "a", $"id" as "b") val initNodes = testDf.queryExecution.logical.collect { case l => l @@ -87,8 +88,8 @@ class AddColumnsFlattenSuite extends QueryTest assert(newNodes.size === initNodes.size) } - test("withColumns: remap of column should not result in new project if the source of remap is" + - "an attribute used in other cols") { + test("withColumnRenamed: remap of column should not result in new project if the source" + + " of remap is an attribute used in other cols") { val testDf = spark.range(1).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b") val initNodes = testDf.queryExecution.logical.collect { @@ -103,7 +104,7 @@ class AddColumnsFlattenSuite extends QueryTest assert(newNodes.size === initNodes.size ) } - test("withColumns: remap of column should not result in new project if the remap" + + test("withColumnRenamed: remap of column should not result in new project if the remap" + " is on an alias") { val testDf = spark.range(1).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d" ) @@ -119,7 +120,7 @@ class AddColumnsFlattenSuite extends QueryTest assert(newNodes.size === initNodes.size) } - test("withColumns: remap of column should not result in new project if the remap" + + test("withColumnRenamed: remap of column should not result in new project if the remap" + " source an alias and that attribute is also projected as another attribute") { val testDf = spark.range(1).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d"). @@ -136,7 +137,7 @@ class AddColumnsFlattenSuite extends QueryTest assert(newNodes.size === initNodes.size) } - test("withColumns: test multi column remap") { + test("withColumnRenamed: test multi column remap") { val testDf = spark.range(1).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") @@ -151,5 +152,44 @@ class AddColumnsFlattenSuite extends QueryTest } assert(newNodes.size === initNodes.size) } + + test("withColumns: test multi column addition") { + val testDf = spark.range(1).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") + + val initNodes = testDf.queryExecution.logical.collect { + case l => l + } + + val newDf = testDf.withColumns( + Seq("newCol1", "newCol2", "newCol3", "newCol4"), + Seq(col("a") + 2, col("b") + 7, col("a") + col("b"), col("a") + col("d")), + ) + + val newNodes = newDf.queryExecution.logical.collect { + case l => l + } + assert(newNodes.size === initNodes.size) + } + + test("use of cached inmemory relation when new columns added do not result in new project") { + val testDf = spark.range(100).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") + + val initNodes = testDf.queryExecution.logical.collect { + case l => l + } + testDf.cache() + val newDf = testDf.withColumns( + Seq("newCol1", "newCol2", "newCol3", "newCol4"), + Seq(col("a") + 2, col("b") + 7, col("a") + col("b"), col("a") + col("d")), + ) + + val newNodes = newDf.queryExecution.logical.collect { + case l => l + } + assert(newNodes.size === initNodes.size) + assert(newDf.queryExecution.optimizedPlan.collectLeaves().head.isInstanceOf[InMemoryRelation]) + } } From 59bddebe6a505c897f7e8c1efd7bc806ab2dc26f Mon Sep 17 00:00:00 2001 From: ashahid Date: Tue, 28 Nov 2023 21:14:19 -0800 Subject: [PATCH 052/129] SPARK-45959 --- .../spark/sql/execution/CacheManager.scala | 39 +++++++++++++++++-- .../spark/sql/AddColumnsFlattenSuite.scala | 22 ++++++++++- 2 files changed, 56 insertions(+), 5 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index 7897ec1989a6..d8ad2743aaed 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -23,7 +23,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.internal.config.ConfigEntry import org.apache.spark.sql.{Dataset, SparkSession} import org.apache.spark.sql.catalyst.catalog.HiveTableRelation -import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, NamedExpression, SubqueryExpression} +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, NamedExpression, SubqueryExpression} import org.apache.spark.sql.catalyst.optimizer.EliminateResolvedHint import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan, Project, ResolvedHint, SubqueryAlias, UnaryNode, View} import org.apache.spark.sql.catalyst.trees.TreePattern.PLAN_EXPRESSION @@ -293,6 +293,20 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { lookupCachedData(query.queryExecution.normalized) } + + /* + Partial match cases: + InComingPlan (case of add cols) cached plan InComing Plan ( case of rename) + Project P2 Project P1 Project P2 + attr1 attr1 attr1 + attr2 attr2 Alias2'(x, attr2) + Alias3 Alias3 Alias3'(y, Alias3-childExpr) + Alias4 Alias4 Alias4'(z, Alias4-childExpr) + Alias5 (k, f(attr1, attr2, al3, al4) + Alias6 (p, f(attr1, attr2, al3, al4) + */ + + /** Optionally returns cached data for the given [[LogicalPlan]]. */ def lookupCachedData(plan: LogicalPlan): Option[CachedData] = { val fullMatch = cachedData.find(cd => plan.sameResult(cd.plan)) @@ -309,10 +323,29 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { val cdPlanProject = cachedPlan.asInstanceOf[Project] val canonicalizedInProj = incomingProject.canonicalized.asInstanceOf[Project] val canonicalizedCdProj = cdPlanProject.canonicalized.asInstanceOf[Project] - // index if -1 indicates it is created out of existing output attribs + // matchIndexInCdPlanProj remains -1 in the end, itindicates it is + // new cols created out of existing output attribs val (equivMapping, inComingProjNeedingMod) = canonicalizedInProj.projectList. zipWithIndex.map { - case (ne, index) => index -> canonicalizedCdProj.projectList.indexWhere(_ == ne) + case (inComingNE, index) => + // first check for equivalent named expressions..if index is != -1, that means + // it is pass thru Alias or pass thru - Attribute + var matchIndexInCdPlanProj = + canonicalizedCdProj.projectList.indexWhere(_ == inComingNE) + if (matchIndexInCdPlanProj == -1) { + // check if it is case of rename: + inComingNE match { + case Alias(attrx: AttributeReference, _) => + matchIndexInCdPlanProj = + canonicalizedCdProj.projectList.indexWhere(_ == attrx) + case Alias(childExpr, _) => matchIndexInCdPlanProj = + canonicalizedCdProj.projectList.indexWhere( + _.children.headOption.map(_ == childExpr).getOrElse(false)) + } + } + + index -> matchIndexInCdPlanProj + }.partition(_._2 != -1) val cdAttribToInAttrib = equivMapping.map { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala index ad0d4a353128..247854e6b0e8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala @@ -163,7 +163,7 @@ class AddColumnsFlattenSuite extends QueryTest val newDf = testDf.withColumns( Seq("newCol1", "newCol2", "newCol3", "newCol4"), - Seq(col("a") + 2, col("b") + 7, col("a") + col("b"), col("a") + col("d")), + Seq(col("a") + 2, col("b") + 7, col("a") + col("b"), col("a") + col("d")) ) val newNodes = newDf.queryExecution.logical.collect { @@ -182,7 +182,7 @@ class AddColumnsFlattenSuite extends QueryTest testDf.cache() val newDf = testDf.withColumns( Seq("newCol1", "newCol2", "newCol3", "newCol4"), - Seq(col("a") + 2, col("b") + 7, col("a") + col("b"), col("a") + col("d")), + Seq(col("a") + 2, col("b") + 7, col("a") + col("b"), col("a") + col("d")) ) val newNodes = newDf.queryExecution.logical.collect { @@ -191,5 +191,23 @@ class AddColumnsFlattenSuite extends QueryTest assert(newNodes.size === initNodes.size) assert(newDf.queryExecution.optimizedPlan.collectLeaves().head.isInstanceOf[InMemoryRelation]) } + + test("use of cached inmemory relation when renamed columns do not result in new project") { + val testDf = spark.range(100).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") + + val initNodes = testDf.queryExecution.logical.collect { + case l => l + } + testDf.cache() + val newDf = testDf.withColumnsRenamed( + Map("c" -> "c1", "a" -> "a1", "b" -> "b1", "d" -> "d1")) + + val newNodes = newDf.queryExecution.logical.collect { + case l => l + } + assert(newNodes.size === initNodes.size) + assert(newDf.queryExecution.optimizedPlan.collectLeaves().head.isInstanceOf[InMemoryRelation]) + } } From 94155f6257d308cb8008c74a524741d97f5b5bfa Mon Sep 17 00:00:00 2001 From: ashahid Date: Wed, 29 Nov 2023 13:15:54 -0800 Subject: [PATCH 053/129] SPARK-45959 --- .../spark/sql/AddColumnsFlattenSuite.scala | 261 +++++++++--------- .../org/apache/spark/sql/QueryTest.scala | 3 +- 2 files changed, 131 insertions(+), 133 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala index 247854e6b0e8..40712e89458d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala @@ -17,9 +17,10 @@ package org.apache.spark.sql +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.execution.columnar.InMemoryRelation -import org.apache.spark.sql.functions._ +import org.apache.spark.sql.functions.col import org.apache.spark.sql.test.SharedSparkSession @@ -28,96 +29,83 @@ class AddColumnsFlattenSuite extends QueryTest import testImplicits._ test("withColumns: check no new project addition for simple columns addition") { - val testDf = spark.range(1).select($"id" as "a", $"id" as "b") - val initNodes = testDf.queryExecution.logical.collect { - case l => l - } - - val newDf = testDf.withColumns(Seq("newCol1", "newCol2"), - Seq(col("a") + 1, col("b") + 2)) - - val newNodes = newDf.queryExecution.logical.collect { - case l => l - } - assert(initNodes.size === newNodes.size) + val testDf = spark.range(10).select($"id" as "a", $"id" as "b") + val initNodes = collectNodes(testDf) + val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, + df => df.withColumns(Seq("newCol1", "newCol2"), Seq(col("a") + 1, col("b") + 2))) + val optDfNodes = collectNodes(newDfOpt) + val nonOptDfNodes = collectNodes(newDfUnopt) + assert(initNodes.size === optDfNodes.size) + assert(nonOptDfNodes.size === optDfNodes.size + 1) + checkAnswer(newDfOpt, newDfUnopt) } test("withColumns: check no new project addition if redefined alias is not used in" + " new columns") { - val testDf = spark.range(1).select($"id" as "a", $"id" as "b").select($"a" + 1 as "a", + val testDf = spark.range(10).select($"id" as "a", $"id" as "b").select($"a" + 1 as "a", $"b") - val initNodes = testDf.queryExecution.logical.collect { - case l => l - } - - val newDf = testDf.withColumns(Seq("newCol1"), Seq(col("b") + 2)) - - val newNodes = newDf.queryExecution.logical.collect { - case l => l - } - assert(initNodes.size === newNodes.size) + val initNodes = collectNodes(testDf) + val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, + df => df.withColumns(Seq("newCol1"), Seq(col("b") + 2))) + val optDfNodes = collectNodes(newDfOpt) + val nonOptDfNodes = collectNodes(newDfUnopt) + assert(initNodes.size === optDfNodes.size) + assert(nonOptDfNodes.size === optDfNodes.size + 1) + checkAnswer(newDfOpt, newDfUnopt) } test("withColumns: new project addition if redefined alias is used in new columns") { - val testDf = spark.range(1).select($"id" as "a", $"id" as "b").select($"a" + 1 as "a", + val testDf = spark.range(10).select($"id" as "a", $"id" as "b").select($"a" + 1 as "a", $"b") - val initNodes = testDf.queryExecution.logical.collect { - case l => l - } - - val newDf = testDf.withColumns(Seq("newCol1"), Seq(col("a") + 2)) - - val newNodes = newDf.queryExecution.logical.collect { - case l => l - } - assert(newNodes.size === initNodes.size + 1) + val initNodes = collectNodes(testDf) + val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, + df => df.withColumns(Seq("newCol1"), Seq(col("a") + 2))) + val optDfNodes = collectNodes(newDfOpt) + val nonOptDfNodes = collectNodes(newDfUnopt) + assert(initNodes.size + 1 === optDfNodes.size) + assert(nonOptDfNodes.size === optDfNodes.size) + checkAnswer(newDfOpt, newDfUnopt) } test("withColumnRenamed: remap of column should not result in new project if the source" + " of remap is not used in other cols") { - val testDf = spark.range(1).select($"id" as "a", $"id" as "b") - val initNodes = testDf.queryExecution.logical.collect { - case l => l - } - - val newDf = testDf.withColumnRenamed("a", "c") - - val newNodes = newDf.queryExecution.logical.collect { - case l => l - } - assert(newNodes.size === initNodes.size) + val testDf = spark.range(10).select($"id" as "a", $"id" as "b") + val initNodes = collectNodes(testDf) + val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, + df => df.withColumnRenamed("a", "c")) + val optDfNodes = collectNodes(newDfOpt) + val nonOptDfNodes = collectNodes(newDfUnopt) + assert(initNodes.size === optDfNodes.size) + assert(nonOptDfNodes.size === optDfNodes.size + 1) + checkAnswer(newDfOpt, newDfUnopt) } test("withColumnRenamed: remap of column should not result in new project if the source" + " of remap is an attribute used in other cols") { - val testDf = spark.range(1).select($"id" as "a", $"id" as "b"). + val testDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b") - val initNodes = testDf.queryExecution.logical.collect { - case l => l - } - - val newDf = testDf.withColumnRenamed("a", "d") - - val newNodes = newDf.queryExecution.logical.collect { - case l => l - } - assert(newNodes.size === initNodes.size ) + val initNodes = collectNodes(testDf) + val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, + df => df.withColumnRenamed("a", "d")) + val optDfNodes = collectNodes(newDfOpt) + val nonOptDfNodes = collectNodes(newDfUnopt) + assert(initNodes.size === optDfNodes.size) + assert(nonOptDfNodes.size === optDfNodes.size + 1) + checkAnswer(newDfOpt, newDfUnopt) } test("withColumnRenamed: remap of column should not result in new project if the remap" + " is on an alias") { - val testDf = spark.range(1).select($"id" as "a", $"id" as "b"). + val testDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d" ) - val initNodes = testDf.queryExecution.logical.collect { - case l => l - } - - val newDf = testDf.withColumnRenamed("d", "x") - - val newNodes = newDf.queryExecution.logical.collect { - case l => l - } - assert(newNodes.size === initNodes.size) + val initNodes = collectNodes(testDf) + val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, + df => df.withColumnRenamed("d", "x")) + val optDfNodes = collectNodes(newDfOpt) + val nonOptDfNodes = collectNodes(newDfUnopt) + assert(initNodes.size === optDfNodes.size) + assert(nonOptDfNodes.size === optDfNodes.size + 1) + checkAnswer(newDfOpt, newDfUnopt) } test("withColumnRenamed: remap of column should not result in new project if the remap" + @@ -125,89 +113,98 @@ class AddColumnsFlattenSuite extends QueryTest val testDf = spark.range(1).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d"). select($"c", $"a", $"b", $"d", $"d" as "k") - val initNodes = testDf.queryExecution.logical.collect { - case l => l - } - - val newDf = testDf.withColumnRenamed("d", "x") - - val newNodes = newDf.queryExecution.logical.collect { - case l => l - } - assert(newNodes.size === initNodes.size) + val initNodes = collectNodes(testDf) + val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, + df => df.withColumnRenamed("d", "x")) + val optDfNodes = collectNodes(newDfOpt) + val nonOptDfNodes = collectNodes(newDfUnopt) + assert(initNodes.size === optDfNodes.size) + assert(nonOptDfNodes.size === optDfNodes.size + 1) + checkAnswer(newDfOpt, newDfUnopt) } test("withColumnRenamed: test multi column remap") { - val testDf = spark.range(1).select($"id" as "a", $"id" as "b"). + val testDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") - - val initNodes = testDf.queryExecution.logical.collect { - case l => l - } - - val newDf = testDf.withColumnsRenamed(Map("d" -> "x", "c" -> "k", "a" -> "u")) - - val newNodes = newDf.queryExecution.logical.collect { - case l => l - } - assert(newNodes.size === initNodes.size) + val initNodes = collectNodes(testDf) + val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, + df => df.withColumnsRenamed(Map("d" -> "x", "c" -> "k", "a" -> "u"))) + val optDfNodes = collectNodes(newDfOpt) + val nonOptDfNodes = collectNodes(newDfUnopt) + assert(initNodes.size === optDfNodes.size) + assert(nonOptDfNodes.size === optDfNodes.size + 1) + checkAnswer(newDfOpt, newDfUnopt) } test("withColumns: test multi column addition") { - val testDf = spark.range(1).select($"id" as "a", $"id" as "b"). + val testDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") - - val initNodes = testDf.queryExecution.logical.collect { - case l => l - } - - val newDf = testDf.withColumns( - Seq("newCol1", "newCol2", "newCol3", "newCol4"), - Seq(col("a") + 2, col("b") + 7, col("a") + col("b"), col("a") + col("d")) - ) - - val newNodes = newDf.queryExecution.logical.collect { - case l => l - } - assert(newNodes.size === initNodes.size) + val initNodes = collectNodes(testDf) + val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, + df => df.withColumns( + Seq("newCol1", "newCol2", "newCol3", "newCol4"), + Seq(col("a") + 2, col("b") + 7, col("a") + col("b"), col("a") + col("d")))) + val optDfNodes = collectNodes(newDfOpt) + val nonOptDfNodes = collectNodes(newDfUnopt) + assert(initNodes.size === optDfNodes.size) + assert(nonOptDfNodes.size === optDfNodes.size + 1) + checkAnswer(newDfOpt, newDfUnopt) } test("use of cached inmemory relation when new columns added do not result in new project") { - val testDf = spark.range(100).select($"id" as "a", $"id" as "b"). + val testDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") - - val initNodes = testDf.queryExecution.logical.collect { - case l => l - } testDf.cache() - val newDf = testDf.withColumns( - Seq("newCol1", "newCol2", "newCol3", "newCol4"), - Seq(col("a") + 2, col("b") + 7, col("a") + col("b"), col("a") + col("d")) - ) - - val newNodes = newDf.queryExecution.logical.collect { - case l => l - } - assert(newNodes.size === initNodes.size) - assert(newDf.queryExecution.optimizedPlan.collectLeaves().head.isInstanceOf[InMemoryRelation]) + val initNodes = collectNodes(testDf) + val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, + df => df.withColumns( + Seq("newCol1", "newCol2", "newCol3", "newCol4"), + Seq(col("a") + 2, col("b") + 7, col("a") + col("b"), col("a") + col("d")) + )) + val optDfNodes = collectNodes(newDfOpt) + val nonOptDfNodes = collectNodes(newDfUnopt) + assert(initNodes.size === optDfNodes.size) + assert(nonOptDfNodes.size === optDfNodes.size + 1) + checkAnswer(newDfOpt, newDfUnopt) + assert(newDfOpt.queryExecution.optimizedPlan.collectLeaves().head. + isInstanceOf[InMemoryRelation]) } test("use of cached inmemory relation when renamed columns do not result in new project") { - val testDf = spark.range(100).select($"id" as "a", $"id" as "b"). + val testDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") - - val initNodes = testDf.queryExecution.logical.collect { - case l => l - } testDf.cache() - val newDf = testDf.withColumnsRenamed( - Map("c" -> "c1", "a" -> "a1", "b" -> "b1", "d" -> "d1")) + val initNodes = collectNodes(testDf) + val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, + df => df.withColumnsRenamed( + Map("c" -> "c1", "a" -> "a1", "b" -> "b1", "d" -> "d1"))) + val optDfNodes = collectNodes(newDfOpt) + val nonOptDfNodes = collectNodes(newDfUnopt) + assert(initNodes.size === optDfNodes.size) + assert(nonOptDfNodes.size === optDfNodes.size + 1) + checkAnswer(newDfOpt, newDfUnopt) + assert(newDfOpt.queryExecution.optimizedPlan.collectLeaves().head. + isInstanceOf[InMemoryRelation]) + } - val newNodes = newDf.queryExecution.logical.collect { - case l => l - } - assert(newNodes.size === initNodes.size) - assert(newDf.queryExecution.optimizedPlan.collectLeaves().head.isInstanceOf[InMemoryRelation]) + private def getComparableDataFrames( + baseDf: DataFrame, + transformation: DataFrame => DataFrame): (DataFrame, DataFrame) = { + // first obtain optimized transformation which avoids adding new project + val newDfOpt = transformation(baseDf) + // then obtain optimized transformation which adds new project + val logicalPlan = baseDf.logicalPlan + val newDfUnopt = try { + logicalPlan.setTagValue[Boolean](LogicalPlan.SKIP_FLATTENING, true) + transformation(baseDf) + } finally { + logicalPlan.unsetTagValue(LogicalPlan.SKIP_FLATTENING) + } + (newDfOpt, newDfUnopt) + } + + private def collectNodes(df: DataFrame): Seq[LogicalPlan] = df.queryExecution.logical.collect { + case l => l } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala index f5ba655e3e85..df4f443b0e22 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala @@ -236,7 +236,8 @@ abstract class QueryTest extends PlanTest { def assertEmptyMissingInput(query: Dataset[_]): Unit = { assert(query.queryExecution.analyzed.missingInput.isEmpty, s"The analyzed logical plan has missing inputs:\n${query.queryExecution.analyzed}") - assert(query.queryExecution.optimizedPlan.missingInput.isEmpty, + assert(query.queryExecution.optimizedPlan.children.isEmpty || + query.queryExecution.optimizedPlan.missingInput.isEmpty, s"The optimized logical plan has missing inputs:\n${query.queryExecution.optimizedPlan}") assert(query.queryExecution.executedPlan.missingInput.isEmpty, s"The physical plan has missing inputs:\n${query.queryExecution.executedPlan}") From a5e3f097742f8ce29700f06bda7ed4ef508815ce Mon Sep 17 00:00:00 2001 From: ashahid Date: Wed, 29 Nov 2023 13:30:54 -0800 Subject: [PATCH 054/129] SPARK-45959 --- .../scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala index 40712e89458d..82ef950c27b1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala @@ -151,7 +151,7 @@ class AddColumnsFlattenSuite extends QueryTest checkAnswer(newDfOpt, newDfUnopt) } - test("use of cached inmemory relation when new columns added do not result in new project") { + test("use of cached InMemoryRelation when new columns added do not result in new project") { val testDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") testDf.cache() @@ -170,7 +170,7 @@ class AddColumnsFlattenSuite extends QueryTest isInstanceOf[InMemoryRelation]) } - test("use of cached inmemory relation when renamed columns do not result in new project") { + test("use of cached InMemoryRelation when renamed columns do not result in new project") { val testDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") testDf.cache() From 89433d994bf8943ffbff95070ef55ce27a001af1 Mon Sep 17 00:00:00 2001 From: ashahid Date: Mon, 11 Dec 2023 13:55:38 -0800 Subject: [PATCH 055/129] SPARK-45959. Removing some checks and limitation --- .../sql/internal/EasilyFlattenable.scala | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index a626ce60acb2..e2bbe50d7efa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -19,8 +19,8 @@ package org.apache.spark.sql.internal import scala.util.{Failure, Success, Try} -import org.apache.spark.sql.catalyst.analysis.{UnresolvedAlias, UnresolvedAttribute, UnresolvedFunction} -import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, AttributeSet, NamedExpression, UserDefinedExpression} +import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction} +import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, AttributeSet, Expression, NamedExpression} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} @@ -49,23 +49,22 @@ private[sql] object EasilyFlattenable { opType match { case OpType.AddNewColumnsOnly => // case of new columns being added only - val attribsReassignedInProj = projList.filter(ne => ne match { - case _: AttributeReference => false - case _ => true - }).map(_.name).toSet.intersect(child.output.map(_.name).toSet) - - if (tinkeredOrNewNamedExprs.exists(ne => ne.references.exists { - case attr: AttributeReference => attribsReassignedInProj.contains(attr.name) - case u: UnresolvedAttribute => if (u.nameParts.size > 1) { - true + val childOutput = child.output.map(_.name).toSet + val attribsRemappedInProj = projList.flatMap(ne => ne match { + case _: AttributeReference => Seq.empty[(String, Expression)] + + case Alias(expr, name) => if (childOutput.contains(name)) { + Seq(name -> expr) } else { - attribsReassignedInProj.contains(u.name) + Seq.empty[(String, Expression)] } - } || ne.collectFirst { + + case _ => Seq.empty[(String, Expression)] + }).toMap + + if (tinkeredOrNewNamedExprs.exists(_.collectFirst { case ex if !ex.deterministic => ex - case ex if ex.isInstanceOf[UserDefinedExpression] => ex case u: UnresolvedAttribute if u.nameParts.size != 1 => u - case u: UnresolvedAlias => u case u: UnresolvedFunction if u.nameParts.size == 1 & u.nameParts.head == "struct" => u }.nonEmpty)) { @@ -77,20 +76,21 @@ private[sql] object EasilyFlattenable { _.toAttribute.canonicalized == attr.canonicalized).getOrElse(attr) case anyOtherExpr => (anyOtherExpr transformUp { - case attr: AttributeReference => projList.find( + case attr: AttributeReference => attribsRemappedInProj.get(attr.name).orElse( + projList.find( _.toAttribute.canonicalized == attr.canonicalized).map { case al: Alias => al.child case x => x - }.getOrElse(attr) + }).getOrElse(attr) - case u: UnresolvedAttribute => projList.find( - _.toAttribute.name.equalsIgnoreCase(u.name)).map { + case u: UnresolvedAttribute => attribsRemappedInProj.get(u.name).orElse( + projList.find( _.toAttribute.name.equalsIgnoreCase(u.name)).map { case al: Alias => al.child case u: UnresolvedAttribute => throw new UnsupportedOperationException("Not able to flatten" + s" unresolved attribute $u") case x => x - }.getOrElse(throw new UnsupportedOperationException("Not able to flatten" + + }).getOrElse(throw new UnsupportedOperationException("Not able to flatten" + s" unresolved attribute $u")) }).asInstanceOf[NamedExpression] } From 567cf2c0faf44fd6245db1c8c55877888993e13b Mon Sep 17 00:00:00 2001 From: ashahid Date: Mon, 11 Dec 2023 14:25:08 -0800 Subject: [PATCH 056/129] SPARK-45959. formatting failure fix --- .../sql/connect/planner/SparkConnectPlanner.scala | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala index e3998f99338c..7cb6b5ac7ddc 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala @@ -985,18 +985,14 @@ class SparkConnectPlanner( val (colNames, newColNames) = rel.getRenamesList.asScala.toSeq.map { rename => (rename.getColName, rename.getNewColName) }.unzip - val ds = Dataset - .ofRows(session, transformRelation(rel.getInput)) + val ds = Dataset.ofRows(session, transformRelation(rel.getInput)) ds.logicalPlan.setTagValue(LogicalPlan.SKIP_FLATTENING, true) - ds.withColumnsRenamed(colNames, newColNames) - .logicalPlan + ds.withColumnsRenamed(colNames, newColNames).logicalPlan } else { // for backward compatibility - val ds = Dataset - .ofRows(session, transformRelation(rel.getInput)) + val ds = Dataset.ofRows(session, transformRelation(rel.getInput)) ds.logicalPlan.setTagValue(LogicalPlan.SKIP_FLATTENING, true) - ds.withColumnsRenamed(rel.getRenameColumnsMapMap) - .logicalPlan + ds.withColumnsRenamed(rel.getRenameColumnsMapMap).logicalPlan } } From ad3e950414a1328fad7a8cfef07d56eb958eafa3 Mon Sep 17 00:00:00 2001 From: ashahid Date: Mon, 11 Dec 2023 15:31:17 -0800 Subject: [PATCH 057/129] SPARK-45959. fix test failure --- .../org/apache/spark/sql/internal/EasilyFlattenable.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index e2bbe50d7efa..3beda9db3e92 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.internal import scala.util.{Failure, Success, Try} import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction} -import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, AttributeSet, Expression, NamedExpression} +import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, AttributeSet, Expression, NamedExpression, UserDefinedExpression} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} @@ -64,6 +64,7 @@ private[sql] object EasilyFlattenable { if (tinkeredOrNewNamedExprs.exists(_.collectFirst { case ex if !ex.deterministic => ex + case ex if ex.isInstanceOf[UserDefinedExpression] => ex case u: UnresolvedAttribute if u.nameParts.size != 1 => u case u: UnresolvedFunction if u.nameParts.size == 1 & u.nameParts.head == "struct" => u From e57fafffa68a6dbe117e528743687cbd664a8326 Mon Sep 17 00:00:00 2001 From: ashahid Date: Mon, 11 Dec 2023 16:08:35 -0800 Subject: [PATCH 058/129] SPARK-45959. fix test failure --- .../spark/sql/AddColumnsFlattenSuite.scala | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala index 82ef950c27b1..5f1b8d6074b9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala @@ -54,7 +54,7 @@ class AddColumnsFlattenSuite extends QueryTest checkAnswer(newDfOpt, newDfUnopt) } - test("withColumns: new project addition if redefined alias is used in new columns") { + test("withColumns: no new project addition if redefined alias is used in new columns - 1") { val testDf = spark.range(10).select($"id" as "a", $"id" as "b").select($"a" + 1 as "a", $"b") val initNodes = collectNodes(testDf) @@ -62,8 +62,22 @@ class AddColumnsFlattenSuite extends QueryTest df => df.withColumns(Seq("newCol1"), Seq(col("a") + 2))) val optDfNodes = collectNodes(newDfOpt) val nonOptDfNodes = collectNodes(newDfUnopt) - assert(initNodes.size + 1 === optDfNodes.size) - assert(nonOptDfNodes.size === optDfNodes.size) + assert(initNodes.size === optDfNodes.size) + assert(nonOptDfNodes.size === optDfNodes.size + 1) + checkAnswer(newDfOpt, newDfUnopt) + } + + test("withColumns: no new project addition if redefined alias is used in new columns - 2") { + val testDf = spark.range(20).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b"). + select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e") + val initNodes = collectNodes(testDf) + val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, + df => df.withColumns(Seq("newCol1"), Seq(col("c") + 2 + col("a") * col("e")))) + val optDfNodes = collectNodes(newDfOpt) + val nonOptDfNodes = collectNodes(newDfUnopt) + assert(initNodes.size === optDfNodes.size) + assert(nonOptDfNodes.size === optDfNodes.size + 1) checkAnswer(newDfOpt, newDfUnopt) } From 2fb39ec1a290ac1d7228645bb22f4a73011ada99 Mon Sep 17 00:00:00 2001 From: ashahid Date: Mon, 11 Dec 2023 21:36:48 -0800 Subject: [PATCH 059/129] SPARK-45959. fix test failure --- .../spark/sql/execution/CacheManager.scala | 62 ++++++++++++------- .../spark/sql/AddColumnsFlattenSuite.scala | 19 +++++- 2 files changed, 58 insertions(+), 23 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index d8ad2743aaed..be0183929d25 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -22,8 +22,10 @@ import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.internal.Logging import org.apache.spark.internal.config.ConfigEntry import org.apache.spark.sql.{Dataset, SparkSession} +import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.catalog.HiveTableRelation -import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, NamedExpression, SubqueryExpression} +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, LeafExpression, NamedExpression, SubqueryExpression} +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} import org.apache.spark.sql.catalyst.optimizer.EliminateResolvedHint import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan, Project, ResolvedHint, SubqueryAlias, UnaryNode, View} import org.apache.spark.sql.catalyst.trees.TreePattern.PLAN_EXPRESSION @@ -33,6 +35,7 @@ import org.apache.spark.sql.execution.command.CommandUtils import org.apache.spark.sql.execution.datasources.{FileIndex, HadoopFsRelation, LogicalRelation} import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileTable} import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.DataType import org.apache.spark.storage.StorageLevel import org.apache.spark.storage.StorageLevel.MEMORY_AND_DISK import org.apache.spark.util.ArrayImplicits._ @@ -325,7 +328,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { val canonicalizedCdProj = cdPlanProject.canonicalized.asInstanceOf[Project] // matchIndexInCdPlanProj remains -1 in the end, itindicates it is // new cols created out of existing output attribs - val (equivMapping, inComingProjNeedingMod) = canonicalizedInProj.projectList. + val (equivMapping, inComingProjNoDirectMapping) = canonicalizedInProj.projectList. zipWithIndex.map { case (inComingNE, index) => // first check for equivalent named expressions..if index is != -1, that means @@ -340,7 +343,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { canonicalizedCdProj.projectList.indexWhere(_ == attrx) case Alias(childExpr, _) => matchIndexInCdPlanProj = canonicalizedCdProj.projectList.indexWhere( - _.children.headOption.map(_ == childExpr).getOrElse(false)) + _.children.headOption.exists(_ == childExpr)) } } @@ -348,31 +351,38 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { }.partition(_._2 != -1) + // If expressions of inComingProjNoDirectMapping can be expressed in terms of the + // incoming attribute refs or incoming alias exprs, which can be mapped directly + // to the CachedPlan's output, we are good. so lets transform such indirectly + // mappable named expressions in terms of mappable attributes of the incoming plan + val directlyMappedIncomingProjs = equivMapping.map { + case(incmngIndex, _) => incomingProject.projectList(incmngIndex) + } + val transformedIndirectlyMappableExpr = inComingProjNoDirectMapping.map { + case (incomngIndex, _) => + val ne = incomingProject.projectList(incomngIndex) + val modifiedNe = ne.transformDown { + case expr => directlyMappedIncomingProjs.find(ne => ne.toAttribute == expr + || ne.children.headOption.contains(expr)). + map(ne => Replaceable(ne.toAttribute)).getOrElse(expr) + }.asInstanceOf[NamedExpression] + incomngIndex -> modifiedNe + }.toMap + val cdAttribToInAttrib = equivMapping.map { case (inAttribIndex, cdAttribIndex) => cdPlanProject.projectList(cdAttribIndex).toAttribute -> incomingProject.projectList(inAttribIndex).toAttribute }.toMap if (cdAttribToInAttrib.size == cachedPlan.output.size && - canonicalizedInProj.projectList.map(_.references).reduce(_ ++ _). - subsetOf(canonicalizedCdProj.outputSet)) { - val projectionToForceOnCdPlan = cachedPlan.output.map(cdAttribToInAttrib) - val modifiedInProj = incomingProject.projectList.zipWithIndex.map { - case (ne, indx) => if (equivMapping.exists(_._1 == indx)) { - ne.toAttribute - } else { - ne.transformUp { - case attr: Attribute => val indexInChildOutput = - incomingPlan.child.output.indexWhere(_.canonicalized == attr.canonicalized) - val attribInChildCdPlan = cachedPlan.child.output(indexInChildOutput) - val attribInCdPlan = cdPlanProject.projectList.find { - case attr: Attribute => - attr.canonicalized == attribInChildCdPlan.canonicalized - case al: Alias => - al.child.canonicalized == attribInChildCdPlan.canonicalized - }.get.toAttribute - cdAttribToInAttrib.find( - _._1.canonicalized == attribInCdPlan.canonicalized).map(_._2).get + transformedIndirectlyMappableExpr.forall(_._2.references.isEmpty)) { + val projectionToForceOnCdPlan = cachedPlan.output.map(cdAttribToInAttrib) + val modifiedInProj = incomingProject.projectList.zipWithIndex.map { + case (ne, indx) => if (equivMapping.exists(_._1 == indx)) { + ne.toAttribute + } else { + transformedIndirectlyMappableExpr(indx).transformUp { + case Replaceable(attribToUse) => attribToUse }.asInstanceOf[NamedExpression] } } @@ -506,3 +516,11 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { } } } + +private case class Replaceable(attribToUse: Attribute) extends LeafExpression { + override def nullable: Boolean = false + override def eval(input: InternalRow): Any = throw new UnsupportedOperationException() + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = + throw new UnsupportedOperationException() + override def dataType: DataType = attribToUse.dataType +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala index 5f1b8d6074b9..a43012669142 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala @@ -165,7 +165,7 @@ class AddColumnsFlattenSuite extends QueryTest checkAnswer(newDfOpt, newDfUnopt) } - test("use of cached InMemoryRelation when new columns added do not result in new project") { + test("use of cached InMemoryRelation when new columns added do not result in new project -1") { val testDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") testDf.cache() @@ -184,6 +184,23 @@ class AddColumnsFlattenSuite extends QueryTest isInstanceOf[InMemoryRelation]) } + test("use of cached InMemoryRelation when new columns added do not result in new project -2") { + val testDf = spark.range(20).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b"). + select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e") + testDf.cache() + val initNodes = collectNodes(testDf) + val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, + df => df.withColumns(Seq("newCol1"), Seq(col("c") + 2 + col("a") * col("e")))) + val optDfNodes = collectNodes(newDfOpt) + val nonOptDfNodes = collectNodes(newDfUnopt) + assert(initNodes.size === optDfNodes.size) + assert(nonOptDfNodes.size === optDfNodes.size + 1) + checkAnswer(newDfOpt, newDfUnopt) + assert(newDfOpt.queryExecution.optimizedPlan.collectLeaves().head. + isInstanceOf[InMemoryRelation]) + } + test("use of cached InMemoryRelation when renamed columns do not result in new project") { val testDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") From c171e0c5ba62013bb680a76ba353be927434ebb4 Mon Sep 17 00:00:00 2001 From: ashahid Date: Tue, 12 Dec 2023 13:42:24 -0800 Subject: [PATCH 060/129] SPARK-45959. added new tests. Handled flattening of Project when done using dataFrame.select instead of withColumn api --- .../spark/sql/execution/CacheManager.scala | 12 ++++----- .../sql/internal/EasilyFlattenable.scala | 15 ++++++++--- .../spark/sql/AddColumnsFlattenSuite.scala | 26 ++++++++++++++++--- 3 files changed, 38 insertions(+), 15 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index be0183929d25..711f35e657be 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -328,8 +328,8 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { val canonicalizedCdProj = cdPlanProject.canonicalized.asInstanceOf[Project] // matchIndexInCdPlanProj remains -1 in the end, itindicates it is // new cols created out of existing output attribs - val (equivMapping, inComingProjNoDirectMapping) = canonicalizedInProj.projectList. - zipWithIndex.map { + val (incomingToCachedPlanIndxMapping, inComingProjNoDirectMapping) = + canonicalizedInProj.projectList.zipWithIndex.map { case (inComingNE, index) => // first check for equivalent named expressions..if index is != -1, that means // it is pass thru Alias or pass thru - Attribute @@ -346,16 +346,14 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { _.children.headOption.exists(_ == childExpr)) } } - index -> matchIndexInCdPlanProj - }.partition(_._2 != -1) // If expressions of inComingProjNoDirectMapping can be expressed in terms of the // incoming attribute refs or incoming alias exprs, which can be mapped directly // to the CachedPlan's output, we are good. so lets transform such indirectly // mappable named expressions in terms of mappable attributes of the incoming plan - val directlyMappedIncomingProjs = equivMapping.map { + val directlyMappedIncomingProjs = incomingToCachedPlanIndxMapping.map { case(incmngIndex, _) => incomingProject.projectList(incmngIndex) } val transformedIndirectlyMappableExpr = inComingProjNoDirectMapping.map { @@ -369,7 +367,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { incomngIndex -> modifiedNe }.toMap - val cdAttribToInAttrib = equivMapping.map { + val cdAttribToInAttrib = incomingToCachedPlanIndxMapping.map { case (inAttribIndex, cdAttribIndex) => cdPlanProject.projectList(cdAttribIndex).toAttribute -> incomingProject.projectList(inAttribIndex).toAttribute @@ -378,7 +376,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { transformedIndirectlyMappableExpr.forall(_._2.references.isEmpty)) { val projectionToForceOnCdPlan = cachedPlan.output.map(cdAttribToInAttrib) val modifiedInProj = incomingProject.projectList.zipWithIndex.map { - case (ne, indx) => if (equivMapping.exists(_._1 == indx)) { + case (ne, indx) => if (incomingToCachedPlanIndxMapping.exists(_._1 == indx)) { ne.toAttribute } else { transformedIndirectlyMappableExpr(indx).transformUp { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 3beda9db3e92..2d1950515569 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.internal import scala.util.{Failure, Success, Try} import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction} -import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, AttributeSet, Expression, NamedExpression, UserDefinedExpression} +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, Expression, NamedExpression, UserDefinedExpression} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} @@ -37,12 +37,13 @@ private[sql] object EasilyFlattenable { // In the new column list identify those Named Expressions which are just attributes and // hence pass thru val (passThruAttribs, tinkeredOrNewNamedExprs) = newProjList.partition { - case _: AttributeReference => true + case _: Attribute => true case _ => false } val currentOutputAttribs = AttributeSet(p.output) - val passThruAttribsContainedInCurrentOutput = passThruAttribs.forall( - currentOutputAttribs.contains) + val passThruAttribsContainedInCurrentOutput = passThruAttribs.forall( attribute => + currentOutputAttribs.contains(attribute) || + currentOutputAttribs.exists(_.name == attribute.name)) val opType = identifyOp(passThruAttribs, currentOutputAttribs, tinkeredOrNewNamedExprs, passThruAttribsContainedInCurrentOutput) @@ -75,6 +76,12 @@ private[sql] object EasilyFlattenable { newProjList.map { case attr: AttributeReference => projList.find( _.toAttribute.canonicalized == attr.canonicalized).getOrElse(attr) + + case ua: UnresolvedAttribute => + projList.find(_.toAttribute.name.equalsIgnoreCase(ua.name)). + getOrElse(throw new UnsupportedOperationException("Not able to flatten" + + s" unresolved attribute $ua")) + case anyOtherExpr => (anyOtherExpr transformUp { case attr: AttributeReference => attribsRemappedInProj.get(attr.name).orElse( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala index a43012669142..474e0136ef90 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala @@ -29,7 +29,7 @@ class AddColumnsFlattenSuite extends QueryTest import testImplicits._ test("withColumns: check no new project addition for simple columns addition") { - val testDf = spark.range(10).select($"id" as "a", $"id" as "b") + val testDf = spark.range(20).select($"id" as "a", $"id" as "b") val initNodes = collectNodes(testDf) val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, df => df.withColumns(Seq("newCol1", "newCol2"), Seq(col("a") + 1, col("b") + 2))) @@ -42,7 +42,7 @@ class AddColumnsFlattenSuite extends QueryTest test("withColumns: check no new project addition if redefined alias is not used in" + " new columns") { - val testDf = spark.range(10).select($"id" as "a", $"id" as "b").select($"a" + 1 as "a", + val testDf = spark.range(20).select($"id" as "a", $"id" as "b").select($"a" + 1 as "a", $"b") val initNodes = collectNodes(testDf) val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, @@ -55,7 +55,7 @@ class AddColumnsFlattenSuite extends QueryTest } test("withColumns: no new project addition if redefined alias is used in new columns - 1") { - val testDf = spark.range(10).select($"id" as "a", $"id" as "b").select($"a" + 1 as "a", + val testDf = spark.range(20).select($"id" as "a", $"id" as "b").select($"a" + 1 as "a", $"b") val initNodes = collectNodes(testDf) val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, @@ -124,7 +124,7 @@ class AddColumnsFlattenSuite extends QueryTest test("withColumnRenamed: remap of column should not result in new project if the remap" + " source an alias and that attribute is also projected as another attribute") { - val testDf = spark.range(1).select($"id" as "a", $"id" as "b"). + val testDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d"). select($"c", $"a", $"b", $"d", $"d" as "k") val initNodes = collectNodes(testDf) @@ -201,6 +201,24 @@ class AddColumnsFlattenSuite extends QueryTest isInstanceOf[InMemoryRelation]) } + test("use of cached InMemoryRelation when new columns added do not result in new project, with" + + "positions changed") { + val testDf = spark.range(20).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b"). + select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e") + testDf.cache() + val initNodes = collectNodes(testDf) + val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, + df => df.select( $"e", $"a", $"c" + 2 + $"a" * $"e" as "newCol", $"c", $"d", $"b")) + val optDfNodes = collectNodes(newDfOpt) + val nonOptDfNodes = collectNodes(newDfUnopt) + assert(initNodes.size === optDfNodes.size) + assert(nonOptDfNodes.size === optDfNodes.size + 1) + checkAnswer(newDfOpt, newDfUnopt) + assert(newDfOpt.queryExecution.optimizedPlan.collectLeaves().head. + isInstanceOf[InMemoryRelation]) + } + test("use of cached InMemoryRelation when renamed columns do not result in new project") { val testDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") From 431e8c6fa4d8d78c189e64fe1780b5892b2ea98f Mon Sep 17 00:00:00 2001 From: ashahid Date: Tue, 12 Dec 2023 14:37:55 -0800 Subject: [PATCH 061/129] SPARK-45959. added new tests. Handled flattening of Project when done using dataFrame.select instead of withColumn api --- .../sql/internal/EasilyFlattenable.scala | 47 ++++++++++++------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 2d1950515569..5d33061de51e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -22,6 +22,7 @@ import scala.util.{Failure, Success, Try} import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction} import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, Expression, NamedExpression, UserDefinedExpression} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} +import org.apache.spark.util.Utils private[sql] object EasilyFlattenable { @@ -105,30 +106,44 @@ private[sql] object EasilyFlattenable { } remappedNewProjListResult match { case Success(remappedNewProjList) => - val newProj = p.copy(projectList = remappedNewProjList) - Option(newProj) + Option(p.copy(projectList = remappedNewProjList)) + case Failure(_) => None } } case OpType.RemapOnly => // case of renaming of columns - val remappedNewProjListResult = newProjList.map { - case attr: AttributeReference => projList.find( - _.toAttribute.canonicalized == attr.canonicalized).get - - case al@Alias(ar: AttributeReference, name) => - projList.find( - _.toAttribute.canonicalized == ar.canonicalized).map { - case alx: Alias => alx.copy(name = name)(exprId = al.exprId, - qualifier = al.qualifier, explicitMetadata = al.explicitMetadata, - nonInheritableMetadataKeys = al.nonInheritableMetadataKeys) - - case _: AttributeReference => al - }.get + val remappedNewProjListResult = Try { + newProjList.map { + case attr: AttributeReference => projList.find( + _.toAttribute.canonicalized == attr.canonicalized).get + + case ua: UnresolvedAttribute => projList.find( + _.toAttribute.name.equalsIgnoreCase(ua.name)). + getOrElse(throw new UnsupportedOperationException("Not able to flatten" + + s" unresolved attribute $ua")) + + case al@Alias(ar: AttributeReference, name) => + projList.find(_.toAttribute.canonicalized == ar.canonicalized).map { + case alx: Alias => alx.copy(name = name)(exprId = al.exprId, + qualifier = al.qualifier, explicitMetadata = al.explicitMetadata, + nonInheritableMetadataKeys = al.nonInheritableMetadataKeys) + + case _: AttributeReference => al + }.get + } } + remappedNewProjListResult match { + case Success(remappedNewProjList) => + Option(p.copy(projectList = remappedNewProjList)) - Option(p.copy(projectList = remappedNewProjListResult)) + case Failure(ex) => if (Utils.isTesting) { + throw ex + } else { + None + } + } case _ => None } From 4096d775b11714f6216a25dd62064527864d127d Mon Sep 17 00:00:00 2001 From: ashahid Date: Tue, 12 Dec 2023 18:43:19 -0800 Subject: [PATCH 062/129] SPARK-45959. added new tests. Handled flattening of Project when done using dataFrame.select instead of withColumn api --- .../apache/spark/sql/internal/EasilyFlattenable.scala | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 5d33061de51e..54390fe9c752 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -18,9 +18,9 @@ package org.apache.spark.sql.internal import scala.util.{Failure, Success, Try} - import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction} -import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, Expression, NamedExpression, UserDefinedExpression} +import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, Expression, NamedExpression, UserDefinedExpression, WindowExpression} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} import org.apache.spark.util.Utils @@ -65,8 +65,13 @@ private[sql] object EasilyFlattenable { }).toMap if (tinkeredOrNewNamedExprs.exists(_.collectFirst { + // we will not flatten if expressions contain windows or aggregate as if they + // are collapsed it can cause recalculation of functions and inefficiency with + // separate group by clauses case ex if !ex.deterministic => ex - case ex if ex.isInstanceOf[UserDefinedExpression] => ex + case ex: AggregateExpression => ex + case ex: WindowExpression => ex + case ex: UserDefinedExpression => ex case u: UnresolvedAttribute if u.nameParts.size != 1 => u case u: UnresolvedFunction if u.nameParts.size == 1 & u.nameParts.head == "struct" => u From e2fb0cffbbb5c3ded10565c2730cb68ffdae6812 Mon Sep 17 00:00:00 2001 From: ashahid Date: Tue, 12 Dec 2023 18:59:13 -0800 Subject: [PATCH 063/129] SPARK-45959. added new tests. Handled flattening of Project when done using dataFrame.select instead of withColumn api --- .../org/apache/spark/sql/internal/EasilyFlattenable.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 54390fe9c752..b973eae1c8d0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -18,9 +18,10 @@ package org.apache.spark.sql.internal import scala.util.{Failure, Success, Try} + import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction} -import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, Expression, NamedExpression, UserDefinedExpression, WindowExpression} +import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} import org.apache.spark.util.Utils @@ -170,7 +171,6 @@ private[sql] object EasilyFlattenable { } else if (passThruAttribs.size + tinkeredOrNewNamedExprs.size == currentOutputAttribs.size && passThruAttribsContainedInCurrentOutput && tinkeredOrNewNamedExprs.forall { case Alias(_: AttributeReference, _) => true - case _ => false }) { OpType.RemapOnly From 89f136eeaeb0c896405748f7ec48be57245093e4 Mon Sep 17 00:00:00 2001 From: ashahid Date: Tue, 12 Dec 2023 22:09:25 -0800 Subject: [PATCH 064/129] SPARK-45959. added new tests. Handled flattening of Project when done using dataFrame.select instead of withColumn api --- .../spark/sql/internal/EasilyFlattenable.scala | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index b973eae1c8d0..75462c443f7a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -23,8 +23,6 @@ import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFu import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, Expression, NamedExpression, UserDefinedExpression, WindowExpression} import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} -import org.apache.spark.util.Utils - private[sql] object EasilyFlattenable { object OpType extends Enumeration { @@ -125,7 +123,7 @@ private[sql] object EasilyFlattenable { case attr: AttributeReference => projList.find( _.toAttribute.canonicalized == attr.canonicalized).get - case ua: UnresolvedAttribute => projList.find( + case ua: UnresolvedAttribute if ua.nameParts.size == 1 => projList.find( _.toAttribute.name.equalsIgnoreCase(ua.name)). getOrElse(throw new UnsupportedOperationException("Not able to flatten" + s" unresolved attribute $ua")) @@ -138,17 +136,16 @@ private[sql] object EasilyFlattenable { case _: AttributeReference => al }.get + + case x => throw new UnsupportedOperationException("Not able to flatten" + + s" unresolved attribute $x") } } remappedNewProjListResult match { case Success(remappedNewProjList) => Option(p.copy(projectList = remappedNewProjList)) - case Failure(ex) => if (Utils.isTesting) { - throw ex - } else { - None - } + case Failure(_) => None } case _ => None From 695f0e93bb749606381d370f8650bf8f6023740c Mon Sep 17 00:00:00 2001 From: ashahid Date: Tue, 12 Dec 2023 23:57:19 -0800 Subject: [PATCH 065/129] SPARK-45959. added new tests. Handled flattening of Project when done using dataFrame.select instead of withColumn api --- .../org/apache/spark/sql/internal/EasilyFlattenable.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 75462c443f7a..48f3726648d0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -22,7 +22,7 @@ import scala.util.{Failure, Success, Try} import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction} import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, Expression, NamedExpression, UserDefinedExpression, WindowExpression} import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression -import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, UnaryNode} private[sql] object EasilyFlattenable { object OpType extends Enumeration { @@ -33,7 +33,7 @@ private[sql] object EasilyFlattenable { def unapply(tuple: (LogicalPlan, Seq[NamedExpression])): Option[LogicalPlan] = { val (logicalPlan, newProjList) = tuple logicalPlan match { - case p@Project(projList, child) => + case p @ Project(projList, child: UnaryNode) => // In the new column list identify those Named Expressions which are just attributes and // hence pass thru val (passThruAttribs, tinkeredOrNewNamedExprs) = newProjList.partition { From 0f360f8f97cecf50d5213d739f476213d119eadf Mon Sep 17 00:00:00 2001 From: ashahid Date: Wed, 13 Dec 2023 01:07:14 -0800 Subject: [PATCH 066/129] SPARK-45959. added new tests. Handled flattening of Project when done using dataFrame.select instead of withColumn api --- .../org/apache/spark/sql/internal/EasilyFlattenable.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 48f3726648d0..2e0dad9d1c50 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -22,7 +22,7 @@ import scala.util.{Failure, Success, Try} import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction} import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, Expression, NamedExpression, UserDefinedExpression, WindowExpression} import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression -import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, UnaryNode} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} private[sql] object EasilyFlattenable { object OpType extends Enumeration { @@ -33,7 +33,7 @@ private[sql] object EasilyFlattenable { def unapply(tuple: (LogicalPlan, Seq[NamedExpression])): Option[LogicalPlan] = { val (logicalPlan, newProjList) = tuple logicalPlan match { - case p @ Project(projList, child: UnaryNode) => + case p @ Project(projList, child: LogicalPlan) if child.children.size < 2 => // In the new column list identify those Named Expressions which are just attributes and // hence pass thru val (passThruAttribs, tinkeredOrNewNamedExprs) = newProjList.partition { From 240e9a33bbd0ffa7b618040b7f58e4fb9a199a25 Mon Sep 17 00:00:00 2001 From: ashahid Date: Wed, 13 Dec 2023 11:09:03 -0800 Subject: [PATCH 067/129] SPARK-45959. added new tests. Handled flattening of Project when done using dataFrame.select instead of withColumn api --- .../org/apache/spark/sql/internal/EasilyFlattenable.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 2e0dad9d1c50..89d42477a194 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -33,7 +33,7 @@ private[sql] object EasilyFlattenable { def unapply(tuple: (LogicalPlan, Seq[NamedExpression])): Option[LogicalPlan] = { val (logicalPlan, newProjList) = tuple logicalPlan match { - case p @ Project(projList, child: LogicalPlan) if child.children.size < 2 => + case p @ Project(projList, child: LogicalPlan) => // In the new column list identify those Named Expressions which are just attributes and // hence pass thru val (passThruAttribs, tinkeredOrNewNamedExprs) = newProjList.partition { @@ -46,7 +46,7 @@ private[sql] object EasilyFlattenable { currentOutputAttribs.exists(_.name == attribute.name)) val opType = identifyOp(passThruAttribs, currentOutputAttribs, tinkeredOrNewNamedExprs, passThruAttribsContainedInCurrentOutput) - + val ambiguousAttribs = p.output.groupBy(_.name).filter(_._2.size > 1).keys.toSet opType match { case OpType.AddNewColumnsOnly => // case of new columns being added only @@ -71,7 +71,8 @@ private[sql] object EasilyFlattenable { case ex: AggregateExpression => ex case ex: WindowExpression => ex case ex: UserDefinedExpression => ex - case u: UnresolvedAttribute if u.nameParts.size != 1 => u + case u: UnresolvedAttribute if u.nameParts.size != 1 | + ambiguousAttribs.contains(u.name) => u case u: UnresolvedFunction if u.nameParts.size == 1 & u.nameParts.head == "struct" => u }.nonEmpty)) { From ce266129facaf6afb6958d453af053b1c7f7ffeb Mon Sep 17 00:00:00 2001 From: ashahid Date: Wed, 13 Dec 2023 12:44:38 -0800 Subject: [PATCH 068/129] SPARK-45959. added new tests. Handled flattening of Project when done using dataFrame.select instead of withColumn api --- .../apache/spark/sql/internal/EasilyFlattenable.scala | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 89d42477a194..b4f00b27b757 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -72,7 +72,7 @@ private[sql] object EasilyFlattenable { case ex: WindowExpression => ex case ex: UserDefinedExpression => ex case u: UnresolvedAttribute if u.nameParts.size != 1 | - ambiguousAttribs.contains(u.name) => u + ambiguousAttribs.exists(_.equalsIgnoreCase(u.name)) => u case u: UnresolvedFunction if u.nameParts.size == 1 & u.nameParts.head == "struct" => u }.nonEmpty)) { @@ -124,7 +124,12 @@ private[sql] object EasilyFlattenable { case attr: AttributeReference => projList.find( _.toAttribute.canonicalized == attr.canonicalized).get - case ua: UnresolvedAttribute if ua.nameParts.size == 1 => projList.find( + case ua: UnresolvedAttribute if ua.nameParts.size != 1 | + ambiguousAttribs.exists(_.equalsIgnoreCase(ua.name)) => + throw new UnsupportedOperationException("Not able to flatten" + + s" unresolved attribute $ua") + + case ua: UnresolvedAttribute => projList.find( _.toAttribute.name.equalsIgnoreCase(ua.name)). getOrElse(throw new UnsupportedOperationException("Not able to flatten" + s" unresolved attribute $ua")) From 29525fd2fa7a7ba9f204ccf615b30f9e462df506 Mon Sep 17 00:00:00 2001 From: ashahid Date: Wed, 13 Dec 2023 17:28:38 -0800 Subject: [PATCH 069/129] SPARK-45959. added new tests. Handled flattening of Project when done using dataFrame.select instead of withColumn api --- .../spark/sql/internal/EasilyFlattenable.scala | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index b4f00b27b757..48398ab5fc80 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -22,7 +22,8 @@ import scala.util.{Failure, Success, Try} import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction} import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, Expression, NamedExpression, UserDefinedExpression, WindowExpression} import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression -import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} +import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Project} + private[sql] object EasilyFlattenable { object OpType extends Enumeration { @@ -33,7 +34,10 @@ private[sql] object EasilyFlattenable { def unapply(tuple: (LogicalPlan, Seq[NamedExpression])): Option[LogicalPlan] = { val (logicalPlan, newProjList) = tuple logicalPlan match { - case p @ Project(projList, child: LogicalPlan) => + case p @ Project(projList, child) if (child match { + case _: Project | _: LeafNode => true + case _ => false + }) => // In the new column list identify those Named Expressions which are just attributes and // hence pass thru val (passThruAttribs, tinkeredOrNewNamedExprs) = newProjList.partition { @@ -46,7 +50,6 @@ private[sql] object EasilyFlattenable { currentOutputAttribs.exists(_.name == attribute.name)) val opType = identifyOp(passThruAttribs, currentOutputAttribs, tinkeredOrNewNamedExprs, passThruAttribsContainedInCurrentOutput) - val ambiguousAttribs = p.output.groupBy(_.name).filter(_._2.size > 1).keys.toSet opType match { case OpType.AddNewColumnsOnly => // case of new columns being added only @@ -71,8 +74,7 @@ private[sql] object EasilyFlattenable { case ex: AggregateExpression => ex case ex: WindowExpression => ex case ex: UserDefinedExpression => ex - case u: UnresolvedAttribute if u.nameParts.size != 1 | - ambiguousAttribs.exists(_.equalsIgnoreCase(u.name)) => u + case u: UnresolvedAttribute if u.nameParts.size != 1 => u case u: UnresolvedFunction if u.nameParts.size == 1 & u.nameParts.head == "struct" => u }.nonEmpty)) { @@ -124,8 +126,7 @@ private[sql] object EasilyFlattenable { case attr: AttributeReference => projList.find( _.toAttribute.canonicalized == attr.canonicalized).get - case ua: UnresolvedAttribute if ua.nameParts.size != 1 | - ambiguousAttribs.exists(_.equalsIgnoreCase(ua.name)) => + case ua: UnresolvedAttribute if ua.nameParts.size != 1 => throw new UnsupportedOperationException("Not able to flatten" + s" unresolved attribute $ua") From bc5df2362395e30ccdc9ae26ddd62ede435aa8e5 Mon Sep 17 00:00:00 2001 From: ashahid Date: Wed, 13 Dec 2023 21:04:07 -0800 Subject: [PATCH 070/129] SPARK-45959. added new tests. Handled flattening of Project when done using dataFrame.select instead of withColumn api --- .../spark/sql/internal/EasilyFlattenable.scala | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 48398ab5fc80..0a590a301789 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -22,8 +22,7 @@ import scala.util.{Failure, Success, Try} import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction} import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, Expression, NamedExpression, UserDefinedExpression, WindowExpression} import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression -import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Project} - +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} private[sql] object EasilyFlattenable { object OpType extends Enumeration { @@ -34,10 +33,7 @@ private[sql] object EasilyFlattenable { def unapply(tuple: (LogicalPlan, Seq[NamedExpression])): Option[LogicalPlan] = { val (logicalPlan, newProjList) = tuple logicalPlan match { - case p @ Project(projList, child) if (child match { - case _: Project | _: LeafNode => true - case _ => false - }) => + case p @ Project(projList, child: LogicalPlan) => // In the new column list identify those Named Expressions which are just attributes and // hence pass thru val (passThruAttribs, tinkeredOrNewNamedExprs) = newProjList.partition { @@ -50,6 +46,7 @@ private[sql] object EasilyFlattenable { currentOutputAttribs.exists(_.name == attribute.name)) val opType = identifyOp(passThruAttribs, currentOutputAttribs, tinkeredOrNewNamedExprs, passThruAttribsContainedInCurrentOutput) + val ambiguousAttribs = p.output.groupBy(_.name).filter(_._2.size > 1).keys.toSet opType match { case OpType.AddNewColumnsOnly => // case of new columns being added only @@ -74,7 +71,9 @@ private[sql] object EasilyFlattenable { case ex: AggregateExpression => ex case ex: WindowExpression => ex case ex: UserDefinedExpression => ex - case u: UnresolvedAttribute if u.nameParts.size != 1 => u + case attr: AttributeReference if ambiguousAttribs.contains(attr.name) => true + case u: UnresolvedAttribute if u.nameParts.size != 1 | + ambiguousAttribs.exists(_.equalsIgnoreCase(u.name)) => u case u: UnresolvedFunction if u.nameParts.size == 1 & u.nameParts.head == "struct" => u }.nonEmpty)) { @@ -126,7 +125,8 @@ private[sql] object EasilyFlattenable { case attr: AttributeReference => projList.find( _.toAttribute.canonicalized == attr.canonicalized).get - case ua: UnresolvedAttribute if ua.nameParts.size != 1 => + case ua: UnresolvedAttribute if ua.nameParts.size != 1 | + ambiguousAttribs.exists(_.equalsIgnoreCase(ua.name)) => throw new UnsupportedOperationException("Not able to flatten" + s" unresolved attribute $ua") From 2d2fb3e7a64d421195236ee991610d6ac42ae6a5 Mon Sep 17 00:00:00 2001 From: ashahid Date: Wed, 13 Dec 2023 22:23:39 -0800 Subject: [PATCH 071/129] SPARK-45959. added new tests. Handled flattening of Project when done using dataFrame.select instead of withColumn api --- .../apache/spark/sql/internal/EasilyFlattenable.scala | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 0a590a301789..4674155c7a7a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -71,9 +71,9 @@ private[sql] object EasilyFlattenable { case ex: AggregateExpression => ex case ex: WindowExpression => ex case ex: UserDefinedExpression => ex - case attr: AttributeReference if ambiguousAttribs.contains(attr.name) => true + case attr: AttributeReference if ambiguousAttribs.contains(attr.name) => attr case u: UnresolvedAttribute if u.nameParts.size != 1 | - ambiguousAttribs.exists(_.equalsIgnoreCase(u.name)) => u + ambiguousAttribs.contains(u.name) => u case u: UnresolvedFunction if u.nameParts.size == 1 & u.nameParts.head == "struct" => u }.nonEmpty)) { @@ -82,7 +82,10 @@ private[sql] object EasilyFlattenable { val remappedNewProjListResult = Try { newProjList.map { case attr: AttributeReference => projList.find( - _.toAttribute.canonicalized == attr.canonicalized).getOrElse(attr) + _.toAttribute.canonicalized == attr.canonicalized).map { + case _: Attribute => attr + case x => x + }.getOrElse(attr) case ua: UnresolvedAttribute => projList.find(_.toAttribute.name.equalsIgnoreCase(ua.name)). @@ -94,6 +97,7 @@ private[sql] object EasilyFlattenable { case attr: AttributeReference => attribsRemappedInProj.get(attr.name).orElse( projList.find( _.toAttribute.canonicalized == attr.canonicalized).map { + case _: Attribute => attr case al: Alias => al.child case x => x }).getOrElse(attr) From 580180fe5f5d6e79eec057edad335eea0f78fa7d Mon Sep 17 00:00:00 2001 From: ashahid Date: Thu, 14 Dec 2023 00:49:23 -0800 Subject: [PATCH 072/129] SPARK-45959. added new tests. Handled flattening of Project when done using dataFrame.select instead of withColumn api --- .../sql/internal/EasilyFlattenable.scala | 41 ++++++++++++------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 4674155c7a7a..9df1e7536d3d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -20,9 +20,10 @@ package org.apache.spark.sql.internal import scala.util.{Failure, Success, Try} import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction} -import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, Expression, NamedExpression, UserDefinedExpression, WindowExpression} +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, NamedExpression, UserDefinedExpression, WindowExpression} import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} +import org.apache.spark.sql.types.MetadataBuilder private[sql] object EasilyFlattenable { object OpType extends Enumeration { @@ -46,21 +47,20 @@ private[sql] object EasilyFlattenable { currentOutputAttribs.exists(_.name == attribute.name)) val opType = identifyOp(passThruAttribs, currentOutputAttribs, tinkeredOrNewNamedExprs, passThruAttribsContainedInCurrentOutput) - val ambiguousAttribs = p.output.groupBy(_.name).filter(_._2.size > 1).keys.toSet opType match { case OpType.AddNewColumnsOnly => // case of new columns being added only val childOutput = child.output.map(_.name).toSet val attribsRemappedInProj = projList.flatMap(ne => ne match { - case _: AttributeReference => Seq.empty[(String, Expression)] + case _: AttributeReference => Seq.empty[(String, Alias)] - case Alias(expr, name) => if (childOutput.contains(name)) { - Seq(name -> expr) + case al@ Alias(_, name) => if (childOutput.contains(name)) { + Seq(name -> al) } else { - Seq.empty[(String, Expression)] + Seq.empty[(String, Alias)] } - case _ => Seq.empty[(String, Expression)] + case _ => Seq.empty[(String, Alias)] }).toMap if (tinkeredOrNewNamedExprs.exists(_.collectFirst { @@ -71,9 +71,7 @@ private[sql] object EasilyFlattenable { case ex: AggregateExpression => ex case ex: WindowExpression => ex case ex: UserDefinedExpression => ex - case attr: AttributeReference if ambiguousAttribs.contains(attr.name) => attr - case u: UnresolvedAttribute if u.nameParts.size != 1 | - ambiguousAttribs.contains(u.name) => u + case u: UnresolvedAttribute if u.nameParts.size != 1 => u case u: UnresolvedFunction if u.nameParts.size == 1 & u.nameParts.head == "struct" => u }.nonEmpty)) { @@ -84,7 +82,11 @@ private[sql] object EasilyFlattenable { case attr: AttributeReference => projList.find( _.toAttribute.canonicalized == attr.canonicalized).map { case _: Attribute => attr - case x => x + case al: Alias => + val md = new MetadataBuilder().withMetadata(attr.metadata). + withMetadata(al.metadata).build() + al.copy(al.child, al.name)(al.exprId, al.qualifier, Option(md), + al.nonInheritableMetadataKeys) }.getOrElse(attr) case ua: UnresolvedAttribute => @@ -94,11 +96,21 @@ private[sql] object EasilyFlattenable { case anyOtherExpr => (anyOtherExpr transformUp { - case attr: AttributeReference => attribsRemappedInProj.get(attr.name).orElse( + case attr: AttributeReference => attribsRemappedInProj.get(attr.name). + map(al => { + val md = new MetadataBuilder().withMetadata(attr.metadata). + withMetadata(al.metadata).build() + al.copy(al.child, al.name)(al.exprId, al.qualifier, Option(md), + al.nonInheritableMetadataKeys) + }).orElse( projList.find( _.toAttribute.canonicalized == attr.canonicalized).map { case _: Attribute => attr - case al: Alias => al.child + case al: Alias => + val md = new MetadataBuilder().withMetadata(attr.metadata). + withMetadata(al.metadata).build() + al.copy(al.child, al.name)(al.exprId, al.qualifier, Option(md), + al.nonInheritableMetadataKeys) case x => x }).getOrElse(attr) @@ -129,8 +141,7 @@ private[sql] object EasilyFlattenable { case attr: AttributeReference => projList.find( _.toAttribute.canonicalized == attr.canonicalized).get - case ua: UnresolvedAttribute if ua.nameParts.size != 1 | - ambiguousAttribs.exists(_.equalsIgnoreCase(ua.name)) => + case ua: UnresolvedAttribute if ua.nameParts.size != 1 => throw new UnsupportedOperationException("Not able to flatten" + s" unresolved attribute $ua") From 61dae8f61f220a0ecb18ecb23c33556ba159e65d Mon Sep 17 00:00:00 2001 From: ashahid Date: Thu, 14 Dec 2023 11:06:41 -0800 Subject: [PATCH 073/129] SPARK-45959. added new tests. Handled flattening of Project when done using dataFrame.select instead of withColumn api --- .../apache/spark/sql/internal/EasilyFlattenable.scala | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 9df1e7536d3d..1ac790b6e9ef 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} import org.apache.spark.sql.types.MetadataBuilder + private[sql] object EasilyFlattenable { object OpType extends Enumeration { type OpType = Value @@ -128,7 +129,9 @@ private[sql] object EasilyFlattenable { } remappedNewProjListResult match { case Success(remappedNewProjList) => - Option(p.copy(projectList = remappedNewProjList)) + val newProj = p.copy(projectList = remappedNewProjList) + newProj.copyTagsFrom(p) + Option(newProj) case Failure(_) => None } @@ -165,7 +168,9 @@ private[sql] object EasilyFlattenable { } remappedNewProjListResult match { case Success(remappedNewProjList) => - Option(p.copy(projectList = remappedNewProjList)) + val newProj = p.copy(projectList = remappedNewProjList) + newProj.copyTagsFrom(p) + Option(newProj) case Failure(_) => None } From e086eaa6fe23fa29df641bec1b13862db587f79b Mon Sep 17 00:00:00 2001 From: ashahid Date: Thu, 14 Dec 2023 17:22:02 -0800 Subject: [PATCH 074/129] SPARK-45959. added new tests. Handled flattening of Project when done using dataFrame.select instead of withColumn api --- .../scala/org/apache/spark/sql/Dataset.scala | 4 +- .../sql/internal/EasilyFlattenable.scala | 114 ++++++++++++------ 2 files changed, 77 insertions(+), 41 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 4d94fe38a669..ef7ea3ff7402 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -1574,7 +1574,7 @@ class Dataset[T] private[sql]( case other => other } val newProjList = untypedCols.map(_.named) - (logicalPlan, newProjList) match { + (logicalPlan, newProjList, sparkSession.conf) match { case EasilyFlattenable(flattendPlan) if !this.isStreaming && !logicalPlan.getTagValue(LogicalPlan.SKIP_FLATTENING).getOrElse(false) => flattendPlan @@ -2956,7 +2956,7 @@ class Dataset[T] private[sql]( projectList.map(_.name), sparkSession.sessionState.conf.caseSensitiveAnalysis) withPlan( - (logicalPlan, projectList) match { + (logicalPlan, projectList, sparkSession.conf) match { case EasilyFlattenable(flattendPlan) if !this.isStreaming && !logicalPlan.getTagValue(LogicalPlan.SKIP_FLATTENING).getOrElse(false) => flattendPlan diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 1ac790b6e9ef..c01e76c4d5cc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.internal import scala.util.{Failure, Success, Try} +import org.apache.spark.sql.{Dataset, RuntimeConfig} import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction} import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, NamedExpression, UserDefinedExpression, WindowExpression} import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression @@ -26,16 +27,22 @@ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} import org.apache.spark.sql.types.MetadataBuilder + private[sql] object EasilyFlattenable { object OpType extends Enumeration { type OpType = Value val AddNewColumnsOnly, RemapOnly, Unknown = Value } - def unapply(tuple: (LogicalPlan, Seq[NamedExpression])): Option[LogicalPlan] = { - val (logicalPlan, newProjList) = tuple + def unapply(tuple: (LogicalPlan, Seq[NamedExpression], RuntimeConfig)): Option[LogicalPlan] + = { + val (logicalPlan, newProjList, conf) = tuple + logicalPlan match { case p @ Project(projList, child: LogicalPlan) => + val currentDatasetIdOpt = p.getTagValue(Dataset.DATASET_ID_TAG).get.toSet.headOption + val childDatasetIdOpt = child.getTagValue(Dataset.DATASET_ID_TAG).flatMap( + _.toSet.headOption) // In the new column list identify those Named Expressions which are just attributes and // hence pass thru val (passThruAttribs, tinkeredOrNewNamedExprs) = newProjList.partition { @@ -55,7 +62,7 @@ private[sql] object EasilyFlattenable { val attribsRemappedInProj = projList.flatMap(ne => ne match { case _: AttributeReference => Seq.empty[(String, Alias)] - case al@ Alias(_, name) => if (childOutput.contains(name)) { + case al @ Alias(_, name) => if (childOutput.contains(name)) { Seq(name -> al) } else { Seq.empty[(String, Alias)] @@ -80,15 +87,17 @@ private[sql] object EasilyFlattenable { } else { val remappedNewProjListResult = Try { newProjList.map { - case attr: AttributeReference => projList.find( - _.toAttribute.canonicalized == attr.canonicalized).map { - case _: Attribute => attr - case al: Alias => - val md = new MetadataBuilder().withMetadata(attr.metadata). - withMetadata(al.metadata).build() - al.copy(al.child, al.name)(al.exprId, al.qualifier, Option(md), - al.nonInheritableMetadataKeys) - }.getOrElse(attr) + + case attr: AttributeReference => + val ne = projList.find( + _.toAttribute.canonicalized == attr.canonicalized).getOrElse(attr) + if (attr.metadata.contains(Dataset.DATASET_ID_KEY) && + currentDatasetIdOpt.contains(attr.metadata.getLong( + Dataset.DATASET_ID_KEY))) { + addDataFrameIdToCol(conf, ne, child, childDatasetIdOpt) + } else { + ne + } case ua: UnresolvedAttribute => projList.find(_.toAttribute.name.equalsIgnoreCase(ua.name)). @@ -97,23 +106,20 @@ private[sql] object EasilyFlattenable { case anyOtherExpr => (anyOtherExpr transformUp { - case attr: AttributeReference => attribsRemappedInProj.get(attr.name). - map(al => { - val md = new MetadataBuilder().withMetadata(attr.metadata). - withMetadata(al.metadata).build() - al.copy(al.child, al.name)(al.exprId, al.qualifier, Option(md), - al.nonInheritableMetadataKeys) - }).orElse( + case attr: AttributeReference => val ne = + attribsRemappedInProj.get(attr.name).orElse( projList.find( _.toAttribute.canonicalized == attr.canonicalized).map { - case _: Attribute => attr - case al: Alias => - val md = new MetadataBuilder().withMetadata(attr.metadata). - withMetadata(al.metadata).build() - al.copy(al.child, al.name)(al.exprId, al.qualifier, Option(md), - al.nonInheritableMetadataKeys) + case al: Alias => al case x => x }).getOrElse(attr) + if (attr.metadata.contains(Dataset.DATASET_ID_KEY) && + currentDatasetIdOpt.contains(attr.metadata.getLong( + Dataset.DATASET_ID_KEY))) { + addDataFrameIdToCol(conf, ne, child, childDatasetIdOpt) + } else { + ne + } case u: UnresolvedAttribute => attribsRemappedInProj.get(u.name).orElse( projList.find( _.toAttribute.name.equalsIgnoreCase(u.name)).map { @@ -125,13 +131,12 @@ private[sql] object EasilyFlattenable { }).getOrElse(throw new UnsupportedOperationException("Not able to flatten" + s" unresolved attribute $u")) }).asInstanceOf[NamedExpression] + } } remappedNewProjListResult match { case Success(remappedNewProjList) => - val newProj = p.copy(projectList = remappedNewProjList) - newProj.copyTagsFrom(p) - Option(newProj) + Option(p.copy(projectList = remappedNewProjList)) case Failure(_) => None } @@ -141,12 +146,15 @@ private[sql] object EasilyFlattenable { // case of renaming of columns val remappedNewProjListResult = Try { newProjList.map { - case attr: AttributeReference => projList.find( + case attr: AttributeReference => val ne = projList.find( _.toAttribute.canonicalized == attr.canonicalized).get - - case ua: UnresolvedAttribute if ua.nameParts.size != 1 => - throw new UnsupportedOperationException("Not able to flatten" + - s" unresolved attribute $ua") + if (attr.metadata.contains(Dataset.DATASET_ID_KEY) && + currentDatasetIdOpt.contains(attr.metadata.getLong( + Dataset.DATASET_ID_KEY))) { + addDataFrameIdToCol(conf, ne, child, childDatasetIdOpt) + } else { + ne + } case ua: UnresolvedAttribute => projList.find( _.toAttribute.name.equalsIgnoreCase(ua.name)). @@ -154,22 +162,29 @@ private[sql] object EasilyFlattenable { s" unresolved attribute $ua")) case al@Alias(ar: AttributeReference, name) => - projList.find(_.toAttribute.canonicalized == ar.canonicalized).map { - case alx: Alias => alx.copy(name = name)(exprId = al.exprId, - qualifier = al.qualifier, explicitMetadata = al.explicitMetadata, - nonInheritableMetadataKeys = al.nonInheritableMetadataKeys) + val ne = projList.find(_.toAttribute.canonicalized == ar.canonicalized).map { + + case alx : Alias => Alias(alx.child, name)(al.exprId, al.qualifier, + al.explicitMetadata, al.nonInheritableMetadataKeys) case _: AttributeReference => al }.get - + if (ar.metadata.contains(Dataset.DATASET_ID_KEY) && + currentDatasetIdOpt.contains(ar.metadata.getLong( + Dataset.DATASET_ID_KEY))) { + addDataFrameIdToCol(conf, ne, child, childDatasetIdOpt) + } else { + ne + } case x => throw new UnsupportedOperationException("Not able to flatten" + s" unresolved attribute $x") } } remappedNewProjListResult match { case Success(remappedNewProjList) => + val newProj = p.copy(projectList = remappedNewProjList) - newProj.copyTagsFrom(p) + Option(newProj) case Failure(_) => None @@ -202,4 +217,25 @@ private[sql] object EasilyFlattenable { OpType.Unknown } } + + private def addDataFrameIdToCol( + conf: RuntimeConfig, + expr: NamedExpression, + logicalPlan: LogicalPlan, + childDatasetId: Option[Long]): NamedExpression = + if (conf.get(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED) && childDatasetId.nonEmpty) { + val newExpr = expr transform { + case a: AttributeReference + => + val metadata = new MetadataBuilder() + .withMetadata(a.metadata) + .putLong(Dataset.DATASET_ID_KEY, childDatasetId.get) + .putLong(Dataset.COL_POS_KEY, logicalPlan.output.indexWhere(a.semanticEquals)) + .build() + a.withMetadata(metadata) + } + newExpr.asInstanceOf[NamedExpression] + } else { + expr + } } From 73f869afb777fc68cb21cca450cfccfa1ac6a5a8 Mon Sep 17 00:00:00 2001 From: ashahid Date: Thu, 14 Dec 2023 20:48:12 -0800 Subject: [PATCH 075/129] SPARK-45959. added new tests. Handled flattening of Project when done using dataFrame.select instead of withColumn api --- .../apache/spark/sql/internal/EasilyFlattenable.scala | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index c01e76c4d5cc..f99b9ba583fb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -40,6 +40,9 @@ private[sql] object EasilyFlattenable { logicalPlan match { case p @ Project(projList, child: LogicalPlan) => + val currentOutputAttribs = AttributeSet(p.output) + val ambiguiousAttribs = p.output.groupBy(_.name).filter(_._2.size > 1).keySet + val currentDatasetIdOpt = p.getTagValue(Dataset.DATASET_ID_TAG).get.toSet.headOption val childDatasetIdOpt = child.getTagValue(Dataset.DATASET_ID_TAG).flatMap( _.toSet.headOption) @@ -49,7 +52,7 @@ private[sql] object EasilyFlattenable { case _: Attribute => true case _ => false } - val currentOutputAttribs = AttributeSet(p.output) + val passThruAttribsContainedInCurrentOutput = passThruAttribs.forall( attribute => currentOutputAttribs.contains(attribute) || currentOutputAttribs.exists(_.name == attribute.name)) @@ -79,7 +82,8 @@ private[sql] object EasilyFlattenable { case ex: AggregateExpression => ex case ex: WindowExpression => ex case ex: UserDefinedExpression => ex - case u: UnresolvedAttribute if u.nameParts.size != 1 => u + case u: UnresolvedAttribute if u.nameParts.size != 1 | + ambiguiousAttribs.contains(u.name) => u case u: UnresolvedFunction if u.nameParts.size == 1 & u.nameParts.head == "struct" => u }.nonEmpty)) { @@ -156,7 +160,8 @@ private[sql] object EasilyFlattenable { ne } - case ua: UnresolvedAttribute => projList.find( + case ua: UnresolvedAttribute if ua.nameParts.size == 1 & + !ambiguiousAttribs.contains(ua.name) => projList.find( _.toAttribute.name.equalsIgnoreCase(ua.name)). getOrElse(throw new UnsupportedOperationException("Not able to flatten" + s" unresolved attribute $ua")) From 7455648ddcd4418b0a8699dcab34f39aa6fe46a8 Mon Sep 17 00:00:00 2001 From: ashahid Date: Thu, 14 Dec 2023 22:55:42 -0800 Subject: [PATCH 076/129] SPARK-45959. added new tests. Handled flattening of Project when done using dataFrame.select instead of withColumn api --- .../spark/sql/internal/EasilyFlattenable.scala | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index f99b9ba583fb..9aa33a55afbe 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.internal +import scala.collection.mutable import scala.util.{Failure, Success, Try} import org.apache.spark.sql.{Dataset, RuntimeConfig} @@ -27,7 +28,6 @@ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} import org.apache.spark.sql.types.MetadataBuilder - private[sql] object EasilyFlattenable { object OpType extends Enumeration { type OpType = Value @@ -140,7 +140,14 @@ private[sql] object EasilyFlattenable { } remappedNewProjListResult match { case Success(remappedNewProjList) => - Option(p.copy(projectList = remappedNewProjList)) + val newProj = p.copy(projectList = remappedNewProjList) + if (conf.get(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED)) { + val dsIds = p.getTagValue(Dataset.DATASET_ID_TAG).map(_.clone()).getOrElse ( + new mutable.HashSet[Long]) + + newProj.setTagValue(Dataset.DATASET_ID_TAG, dsIds) + } + Option(newProj) case Failure(_) => None } @@ -187,8 +194,13 @@ private[sql] object EasilyFlattenable { } remappedNewProjListResult match { case Success(remappedNewProjList) => - val newProj = p.copy(projectList = remappedNewProjList) + if (conf.get(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED)) { + val dsIds = p.getTagValue(Dataset.DATASET_ID_TAG).map(_.clone()).getOrElse( + new mutable.HashSet[Long]) + + newProj.setTagValue(Dataset.DATASET_ID_TAG, dsIds) + } Option(newProj) From dd32237e44dd62e14a7fa0440f9f2e2ecc63bb8a Mon Sep 17 00:00:00 2001 From: ashahid Date: Fri, 15 Dec 2023 00:30:00 -0800 Subject: [PATCH 077/129] SPARK-45959. added new tests. Handled flattening of Project when done using dataFrame.select instead of withColumn api --- .../sql/internal/EasilyFlattenable.scala | 42 ++++++++++--------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index 9aa33a55afbe..f3a8ee122aae 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -44,8 +44,7 @@ private[sql] object EasilyFlattenable { val ambiguiousAttribs = p.output.groupBy(_.name).filter(_._2.size > 1).keySet val currentDatasetIdOpt = p.getTagValue(Dataset.DATASET_ID_TAG).get.toSet.headOption - val childDatasetIdOpt = child.getTagValue(Dataset.DATASET_ID_TAG).flatMap( - _.toSet.headOption) + // In the new column list identify those Named Expressions which are just attributes and // hence pass thru val (passThruAttribs, tinkeredOrNewNamedExprs) = newProjList.partition { @@ -98,7 +97,7 @@ private[sql] object EasilyFlattenable { if (attr.metadata.contains(Dataset.DATASET_ID_KEY) && currentDatasetIdOpt.contains(attr.metadata.getLong( Dataset.DATASET_ID_KEY))) { - addDataFrameIdToCol(conf, ne, child, childDatasetIdOpt) + addDataFrameIdToCol(conf, ne, child, currentDatasetIdOpt) } else { ne } @@ -120,7 +119,7 @@ private[sql] object EasilyFlattenable { if (attr.metadata.contains(Dataset.DATASET_ID_KEY) && currentDatasetIdOpt.contains(attr.metadata.getLong( Dataset.DATASET_ID_KEY))) { - addDataFrameIdToCol(conf, ne, child, childDatasetIdOpt) + addDataFrameIdToCol(conf, ne, child, currentDatasetIdOpt) } else { ne } @@ -140,14 +139,15 @@ private[sql] object EasilyFlattenable { } remappedNewProjListResult match { case Success(remappedNewProjList) => - val newProj = p.copy(projectList = remappedNewProjList) - if (conf.get(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED)) { - val dsIds = p.getTagValue(Dataset.DATASET_ID_TAG).map(_.clone()).getOrElse ( - new mutable.HashSet[Long]) - - newProj.setTagValue(Dataset.DATASET_ID_TAG, dsIds) - } - Option(newProj) + currentDatasetIdOpt.foreach(id => { + if (conf.get(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED)) { + val dsIds = child.getTagValue(Dataset.DATASET_ID_TAG).getOrElse( + new mutable.HashSet[Long]) + dsIds.add(id) + child.setTagValue(Dataset.DATASET_ID_TAG, dsIds) + } + }) + Option(p.copy(projectList = remappedNewProjList)) case Failure(_) => None } @@ -162,7 +162,7 @@ private[sql] object EasilyFlattenable { if (attr.metadata.contains(Dataset.DATASET_ID_KEY) && currentDatasetIdOpt.contains(attr.metadata.getLong( Dataset.DATASET_ID_KEY))) { - addDataFrameIdToCol(conf, ne, child, childDatasetIdOpt) + addDataFrameIdToCol(conf, ne, child, currentDatasetIdOpt) } else { ne } @@ -184,7 +184,7 @@ private[sql] object EasilyFlattenable { if (ar.metadata.contains(Dataset.DATASET_ID_KEY) && currentDatasetIdOpt.contains(ar.metadata.getLong( Dataset.DATASET_ID_KEY))) { - addDataFrameIdToCol(conf, ne, child, childDatasetIdOpt) + addDataFrameIdToCol(conf, ne, child, currentDatasetIdOpt) } else { ne } @@ -194,13 +194,15 @@ private[sql] object EasilyFlattenable { } remappedNewProjListResult match { case Success(remappedNewProjList) => + currentDatasetIdOpt.foreach(id => { + if (conf.get(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED)) { + val dsIds = child.getTagValue(Dataset.DATASET_ID_TAG).getOrElse( + new mutable.HashSet[Long]) + dsIds.add(id) + child.setTagValue(Dataset.DATASET_ID_TAG, dsIds) + } + }) val newProj = p.copy(projectList = remappedNewProjList) - if (conf.get(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED)) { - val dsIds = p.getTagValue(Dataset.DATASET_ID_TAG).map(_.clone()).getOrElse( - new mutable.HashSet[Long]) - - newProj.setTagValue(Dataset.DATASET_ID_TAG, dsIds) - } Option(newProj) From a1f56e0e4fad1b6b9f6cc15f5066a8d406b61e3a Mon Sep 17 00:00:00 2001 From: ashahid Date: Fri, 15 Dec 2023 01:53:01 -0800 Subject: [PATCH 078/129] SPARK-45959. added new tests. Handled flattening of Project when done using dataFrame.select instead of withColumn api --- .../org/apache/spark/sql/internal/EasilyFlattenable.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index f3a8ee122aae..caf69d7dd6b9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -147,7 +147,9 @@ private[sql] object EasilyFlattenable { child.setTagValue(Dataset.DATASET_ID_TAG, dsIds) } }) - Option(p.copy(projectList = remappedNewProjList)) + val newProj = Project(remappedNewProjList, child) + + Option(newProj) case Failure(_) => None } @@ -202,7 +204,7 @@ private[sql] object EasilyFlattenable { child.setTagValue(Dataset.DATASET_ID_TAG, dsIds) } }) - val newProj = p.copy(projectList = remappedNewProjList) + val newProj = Project(remappedNewProjList, child) Option(newProj) From 8f7a9bf1559044524f504992ba7adf6a99cd3a1c Mon Sep 17 00:00:00 2001 From: ashahid Date: Fri, 15 Dec 2023 07:45:34 -0800 Subject: [PATCH 079/129] SPARK-45959. added new tests. Handled flattening of Project when done using dataFrame.select instead of withColumn api --- .../spark/sql/internal/EasilyFlattenable.scala | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index caf69d7dd6b9..c36ffd5af9ae 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -39,9 +39,9 @@ private[sql] object EasilyFlattenable { val (logicalPlan, newProjList, conf) = tuple logicalPlan match { - case p @ Project(projList, child: LogicalPlan) => + case p @ Project(projList, child: LogicalPlan) if p.output.groupBy(_.name). + forall(_._2.size == 1) => val currentOutputAttribs = AttributeSet(p.output) - val ambiguiousAttribs = p.output.groupBy(_.name).filter(_._2.size > 1).keySet val currentDatasetIdOpt = p.getTagValue(Dataset.DATASET_ID_TAG).get.toSet.headOption @@ -81,8 +81,7 @@ private[sql] object EasilyFlattenable { case ex: AggregateExpression => ex case ex: WindowExpression => ex case ex: UserDefinedExpression => ex - case u: UnresolvedAttribute if u.nameParts.size != 1 | - ambiguiousAttribs.contains(u.name) => u + case u: UnresolvedAttribute if u.nameParts.size != 1 => u case u: UnresolvedFunction if u.nameParts.size == 1 & u.nameParts.head == "struct" => u }.nonEmpty)) { @@ -169,10 +168,9 @@ private[sql] object EasilyFlattenable { ne } - case ua: UnresolvedAttribute if ua.nameParts.size == 1 & - !ambiguiousAttribs.contains(ua.name) => projList.find( - _.toAttribute.name.equalsIgnoreCase(ua.name)). - getOrElse(throw new UnsupportedOperationException("Not able to flatten" + + case ua: UnresolvedAttribute if ua.nameParts.size == 1 => + projList.find( _.toAttribute.name.equalsIgnoreCase(ua.name)). + getOrElse(throw new UnsupportedOperationException("Not able to flatten" + s" unresolved attribute $ua")) case al@Alias(ar: AttributeReference, name) => From 9c59adc2e2024ccc7a02228487c9d278337ae007 Mon Sep 17 00:00:00 2001 From: ashahid Date: Fri, 15 Dec 2023 10:58:57 -0800 Subject: [PATCH 080/129] SPARK-45959. added new tests. Handled flattening of Project when done using dataFrame.select instead of withColumn api --- .../scala/org/apache/spark/sql/Dataset.scala | 4 +- .../sql/internal/EasilyFlattenable.scala | 142 +++++------------- 2 files changed, 39 insertions(+), 107 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index ef7ea3ff7402..c4c30a55381b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -1574,7 +1574,7 @@ class Dataset[T] private[sql]( case other => other } val newProjList = untypedCols.map(_.named) - (logicalPlan, newProjList, sparkSession.conf) match { + (logicalPlan, newProjList, id) match { case EasilyFlattenable(flattendPlan) if !this.isStreaming && !logicalPlan.getTagValue(LogicalPlan.SKIP_FLATTENING).getOrElse(false) => flattendPlan @@ -2956,7 +2956,7 @@ class Dataset[T] private[sql]( projectList.map(_.name), sparkSession.sessionState.conf.caseSensitiveAnalysis) withPlan( - (logicalPlan, projectList, sparkSession.conf) match { + (logicalPlan, projectList, id) match { case EasilyFlattenable(flattendPlan) if !this.isStreaming && !logicalPlan.getTagValue(LogicalPlan.SKIP_FLATTENING).getOrElse(false) => flattendPlan diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala index c36ffd5af9ae..40a2279b6053 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala @@ -17,15 +17,14 @@ package org.apache.spark.sql.internal -import scala.collection.mutable import scala.util.{Failure, Success, Try} -import org.apache.spark.sql.{Dataset, RuntimeConfig} +import org.apache.spark.sql.Dataset import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction} -import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, NamedExpression, UserDefinedExpression, WindowExpression} +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, Expression, NamedExpression, UserDefinedExpression, WindowExpression} import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} -import org.apache.spark.sql.types.MetadataBuilder + private[sql] object EasilyFlattenable { @@ -34,17 +33,19 @@ private[sql] object EasilyFlattenable { val AddNewColumnsOnly, RemapOnly, Unknown = Value } - def unapply(tuple: (LogicalPlan, Seq[NamedExpression], RuntimeConfig)): Option[LogicalPlan] + def unapply(tuple: (LogicalPlan, Seq[NamedExpression], Long)): Option[LogicalPlan] = { - val (logicalPlan, newProjList, conf) = tuple + val (logicalPlan, newProjList, did) = tuple logicalPlan match { - case p @ Project(projList, child: LogicalPlan) if p.output.groupBy(_.name). - forall(_._2.size == 1) => + case p @ Project(projList, child: LogicalPlan) + if newProjList.flatMap(_.collectLeaves()).forall { + case ar: AttributeReference if ar.metadata.contains(Dataset.DATASET_ID_KEY) && + ar.metadata.getLong(Dataset.DATASET_ID_KEY) != did => false + case _ => true + } => val currentOutputAttribs = AttributeSet(p.output) - val currentDatasetIdOpt = p.getTagValue(Dataset.DATASET_ID_TAG).get.toSet.headOption - // In the new column list identify those Named Expressions which are just attributes and // hence pass thru val (passThruAttribs, tinkeredOrNewNamedExprs) = newProjList.partition { @@ -62,15 +63,15 @@ private[sql] object EasilyFlattenable { // case of new columns being added only val childOutput = child.output.map(_.name).toSet val attribsRemappedInProj = projList.flatMap(ne => ne match { - case _: AttributeReference => Seq.empty[(String, Alias)] + case _: AttributeReference => Seq.empty[(String, Expression)] - case al @ Alias(_, name) => if (childOutput.contains(name)) { - Seq(name -> al) + case Alias(expr, name) => if (childOutput.contains(name)) { + Seq(name -> expr) } else { - Seq.empty[(String, Alias)] + Seq.empty[(String, Expression)] } - case _ => Seq.empty[(String, Alias)] + case _ => Seq.empty[(String, Expression)] }).toMap if (tinkeredOrNewNamedExprs.exists(_.collectFirst { @@ -90,46 +91,30 @@ private[sql] object EasilyFlattenable { val remappedNewProjListResult = Try { newProjList.map { - case attr: AttributeReference => - val ne = projList.find( + case attr: AttributeReference => projList.find( _.toAttribute.canonicalized == attr.canonicalized).getOrElse(attr) - if (attr.metadata.contains(Dataset.DATASET_ID_KEY) && - currentDatasetIdOpt.contains(attr.metadata.getLong( - Dataset.DATASET_ID_KEY))) { - addDataFrameIdToCol(conf, ne, child, currentDatasetIdOpt) - } else { - ne - } case ua: UnresolvedAttribute => - projList.find(_.toAttribute.name.equalsIgnoreCase(ua.name)). + projList.find(_.toAttribute.name.equals(ua.name)). getOrElse(throw new UnsupportedOperationException("Not able to flatten" + s" unresolved attribute $ua")) case anyOtherExpr => (anyOtherExpr transformUp { - case attr: AttributeReference => val ne = - attribsRemappedInProj.get(attr.name).orElse( - projList.find( - _.toAttribute.canonicalized == attr.canonicalized).map { - case al: Alias => al - case x => x - }).getOrElse(attr) - if (attr.metadata.contains(Dataset.DATASET_ID_KEY) && - currentDatasetIdOpt.contains(attr.metadata.getLong( - Dataset.DATASET_ID_KEY))) { - addDataFrameIdToCol(conf, ne, child, currentDatasetIdOpt) - } else { - ne - } + case attr: AttributeReference => + attribsRemappedInProj.get(attr.name).orElse(projList.find( + _.toAttribute.canonicalized == attr.canonicalized).map { + case al: Alias => al.child + case x => x + }).getOrElse(attr) case u: UnresolvedAttribute => attribsRemappedInProj.get(u.name).orElse( - projList.find( _.toAttribute.name.equalsIgnoreCase(u.name)).map { - case al: Alias => al.child - case u: UnresolvedAttribute => - throw new UnsupportedOperationException("Not able to flatten" + - s" unresolved attribute $u") - case x => x + projList.find( _.toAttribute.name.equals(u.name)).map { + case al: Alias => al.child + case u: UnresolvedAttribute => + throw new UnsupportedOperationException("Not able to flatten" + + s" unresolved attribute $u") + case x => x }).getOrElse(throw new UnsupportedOperationException("Not able to flatten" + s" unresolved attribute $u")) }).asInstanceOf[NamedExpression] @@ -138,17 +123,7 @@ private[sql] object EasilyFlattenable { } remappedNewProjListResult match { case Success(remappedNewProjList) => - currentDatasetIdOpt.foreach(id => { - if (conf.get(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED)) { - val dsIds = child.getTagValue(Dataset.DATASET_ID_TAG).getOrElse( - new mutable.HashSet[Long]) - dsIds.add(id) - child.setTagValue(Dataset.DATASET_ID_TAG, dsIds) - } - }) - val newProj = Project(remappedNewProjList, child) - - Option(newProj) + Option(Project(remappedNewProjList, child)) case Failure(_) => None } @@ -158,53 +133,31 @@ private[sql] object EasilyFlattenable { // case of renaming of columns val remappedNewProjListResult = Try { newProjList.map { - case attr: AttributeReference => val ne = projList.find( + case attr: AttributeReference => projList.find( _.toAttribute.canonicalized == attr.canonicalized).get - if (attr.metadata.contains(Dataset.DATASET_ID_KEY) && - currentDatasetIdOpt.contains(attr.metadata.getLong( - Dataset.DATASET_ID_KEY))) { - addDataFrameIdToCol(conf, ne, child, currentDatasetIdOpt) - } else { - ne - } + case ua: UnresolvedAttribute if ua.nameParts.size == 1 => - projList.find( _.toAttribute.name.equalsIgnoreCase(ua.name)). + projList.find( _.toAttribute.name.equals(ua.name)). getOrElse(throw new UnsupportedOperationException("Not able to flatten" + s" unresolved attribute $ua")) case al@Alias(ar: AttributeReference, name) => - val ne = projList.find(_.toAttribute.canonicalized == ar.canonicalized).map { + projList.find(_.toAttribute.canonicalized == ar.canonicalized).map { case alx : Alias => Alias(alx.child, name)(al.exprId, al.qualifier, al.explicitMetadata, al.nonInheritableMetadataKeys) case _: AttributeReference => al }.get - if (ar.metadata.contains(Dataset.DATASET_ID_KEY) && - currentDatasetIdOpt.contains(ar.metadata.getLong( - Dataset.DATASET_ID_KEY))) { - addDataFrameIdToCol(conf, ne, child, currentDatasetIdOpt) - } else { - ne - } + case x => throw new UnsupportedOperationException("Not able to flatten" + s" unresolved attribute $x") } } remappedNewProjListResult match { case Success(remappedNewProjList) => - currentDatasetIdOpt.foreach(id => { - if (conf.get(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED)) { - val dsIds = child.getTagValue(Dataset.DATASET_ID_TAG).getOrElse( - new mutable.HashSet[Long]) - dsIds.add(id) - child.setTagValue(Dataset.DATASET_ID_TAG, dsIds) - } - }) - val newProj = Project(remappedNewProjList, child) - - Option(newProj) + Option(Project(remappedNewProjList, child)) case Failure(_) => None } @@ -236,25 +189,4 @@ private[sql] object EasilyFlattenable { OpType.Unknown } } - - private def addDataFrameIdToCol( - conf: RuntimeConfig, - expr: NamedExpression, - logicalPlan: LogicalPlan, - childDatasetId: Option[Long]): NamedExpression = - if (conf.get(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED) && childDatasetId.nonEmpty) { - val newExpr = expr transform { - case a: AttributeReference - => - val metadata = new MetadataBuilder() - .withMetadata(a.metadata) - .putLong(Dataset.DATASET_ID_KEY, childDatasetId.get) - .putLong(Dataset.COL_POS_KEY, logicalPlan.output.indexWhere(a.semanticEquals)) - .build() - a.withMetadata(metadata) - } - newExpr.asInstanceOf[NamedExpression] - } else { - expr - } } From 094681f9d9c987192806469d2357576b72ea3c0a Mon Sep 17 00:00:00 2001 From: ashahid Date: Fri, 15 Dec 2023 12:53:28 -0800 Subject: [PATCH 081/129] SPARK-45959. added new tests. Handled flattening of Project when done using dataFrame.select instead of withColumn api --- .../scala/org/apache/spark/sql/Dataset.scala | 6 +- ...e.scala => EarlyCollapsableProjects.scala} | 55 +++++++++---------- 2 files changed, 28 insertions(+), 33 deletions(-) rename sql/core/src/main/scala/org/apache/spark/sql/internal/{EasilyFlattenable.scala => EarlyCollapsableProjects.scala} (82%) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index c4c30a55381b..28e4bc1da459 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -59,7 +59,7 @@ import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, DataSourceV2ScanRelation, FileTable} import org.apache.spark.sql.execution.python.EvaluatePython import org.apache.spark.sql.execution.stat.StatFunctions -import org.apache.spark.sql.internal.{EasilyFlattenable, SQLConf} +import org.apache.spark.sql.internal.{EarlyCollapsableProjects, SQLConf} import org.apache.spark.sql.streaming.DataStreamWriter import org.apache.spark.sql.types._ import org.apache.spark.sql.util.SchemaUtils @@ -1575,7 +1575,7 @@ class Dataset[T] private[sql]( } val newProjList = untypedCols.map(_.named) (logicalPlan, newProjList, id) match { - case EasilyFlattenable(flattendPlan) if !this.isStreaming && + case EarlyCollapsableProjects(flattendPlan) if !this.isStreaming && !logicalPlan.getTagValue(LogicalPlan.SKIP_FLATTENING).getOrElse(false) => flattendPlan case _ => Project(newProjList, logicalPlan) @@ -2957,7 +2957,7 @@ class Dataset[T] private[sql]( sparkSession.sessionState.conf.caseSensitiveAnalysis) withPlan( (logicalPlan, projectList, id) match { - case EasilyFlattenable(flattendPlan) if !this.isStreaming && + case EarlyCollapsableProjects(flattendPlan) if !this.isStreaming && !logicalPlan.getTagValue(LogicalPlan.SKIP_FLATTENING).getOrElse(false) => flattendPlan case _ => Project(projectList, logicalPlan) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapsableProjects.scala similarity index 82% rename from sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala rename to sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapsableProjects.scala index 40a2279b6053..39791a605557 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EasilyFlattenable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapsableProjects.scala @@ -26,19 +26,17 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} - -private[sql] object EasilyFlattenable { +private[sql] object EarlyCollapsableProjects { object OpType extends Enumeration { type OpType = Value val AddNewColumnsOnly, RemapOnly, Unknown = Value } - def unapply(tuple: (LogicalPlan, Seq[NamedExpression], Long)): Option[LogicalPlan] - = { + def unapply(tuple: (LogicalPlan, Seq[NamedExpression], Long)): Option[LogicalPlan] = { val (logicalPlan, newProjList, did) = tuple logicalPlan match { - case p @ Project(projList, child: LogicalPlan) + case p@Project(projList, child: LogicalPlan) if newProjList.flatMap(_.collectLeaves()).forall { case ar: AttributeReference if ar.metadata.contains(Dataset.DATASET_ID_KEY) && ar.metadata.getLong(Dataset.DATASET_ID_KEY) != did => false @@ -53,7 +51,7 @@ private[sql] object EasilyFlattenable { case _ => false } - val passThruAttribsContainedInCurrentOutput = passThruAttribs.forall( attribute => + val passThruAttribsContainedInCurrentOutput = passThruAttribs.forall(attribute => currentOutputAttribs.contains(attribute) || currentOutputAttribs.exists(_.name == attribute.name)) val opType = identifyOp(passThruAttribs, currentOutputAttribs, tinkeredOrNewNamedExprs, @@ -90,35 +88,34 @@ private[sql] object EasilyFlattenable { } else { val remappedNewProjListResult = Try { newProjList.map { - case attr: AttributeReference => projList.find( _.toAttribute.canonicalized == attr.canonicalized).getOrElse(attr) - case ua: UnresolvedAttribute => - projList.find(_.toAttribute.name.equals(ua.name)). + case ua: UnresolvedAttribute => projList.find(_.toAttribute.name.equals(ua.name)). getOrElse(throw new UnsupportedOperationException("Not able to flatten" + - s" unresolved attribute $ua")) + s" unresolved attribute $ua")) case anyOtherExpr => (anyOtherExpr transformUp { case attr: AttributeReference => attribsRemappedInProj.get(attr.name).orElse(projList.find( _.toAttribute.canonicalized == attr.canonicalized).map { - case al: Alias => al.child - case x => x - }).getOrElse(attr) + case al: Alias => al.child + case x => x + }).getOrElse(attr) case u: UnresolvedAttribute => attribsRemappedInProj.get(u.name).orElse( - projList.find( _.toAttribute.name.equals(u.name)).map { - case al: Alias => al.child - case u: UnresolvedAttribute => - throw new UnsupportedOperationException("Not able to flatten" + - s" unresolved attribute $u") - case x => x - }).getOrElse(throw new UnsupportedOperationException("Not able to flatten" + + projList.find(_.toAttribute.name.equals(u.name)).map { + case al: Alias => al.child + + case u: UnresolvedAttribute => + throw new UnsupportedOperationException("Not able to flatten" + + s" unresolved attribute $u") + + case x => x + }).getOrElse(throw new UnsupportedOperationException("Not able to flatten" + s" unresolved attribute $u")) }).asInstanceOf[NamedExpression] - } } remappedNewProjListResult match { @@ -136,16 +133,15 @@ private[sql] object EasilyFlattenable { case attr: AttributeReference => projList.find( _.toAttribute.canonicalized == attr.canonicalized).get - case ua: UnresolvedAttribute if ua.nameParts.size == 1 => - projList.find( _.toAttribute.name.equals(ua.name)). + projList.find(_.toAttribute.name.equals(ua.name)). getOrElse(throw new UnsupportedOperationException("Not able to flatten" + - s" unresolved attribute $ua")) + s" unresolved attribute $ua")) case al@Alias(ar: AttributeReference, name) => - projList.find(_.toAttribute.canonicalized == ar.canonicalized).map { + projList.find(_.toAttribute.canonicalized == ar.canonicalized).map { - case alx : Alias => Alias(alx.child, name)(al.exprId, al.qualifier, + case alx: Alias => Alias(alx.child, name)(al.exprId, al.qualifier, al.explicitMetadata, al.nonInheritableMetadataKeys) case _: AttributeReference => al @@ -173,17 +169,16 @@ private[sql] object EasilyFlattenable { passThruAttribs: Seq[NamedExpression], currentOutputAttribs: AttributeSet, tinkeredOrNewNamedExprs: Seq[NamedExpression], - passThruAttribsContainedInCurrentOutput: Boolean - ): OpType.OpType = { + passThruAttribsContainedInCurrentOutput: Boolean): OpType.OpType = { if (passThruAttribs.size == currentOutputAttribs.size && - passThruAttribsContainedInCurrentOutput && tinkeredOrNewNamedExprs.nonEmpty) { + passThruAttribsContainedInCurrentOutput && tinkeredOrNewNamedExprs.nonEmpty) { OpType.AddNewColumnsOnly } else if (passThruAttribs.size + tinkeredOrNewNamedExprs.size == currentOutputAttribs.size && passThruAttribsContainedInCurrentOutput && tinkeredOrNewNamedExprs.forall { case Alias(_: AttributeReference, _) => true case _ => false - }) { + }) { OpType.RemapOnly } else { OpType.Unknown From 3f20dd23daa0b88d926515267fc930486f77dc26 Mon Sep 17 00:00:00 2001 From: ashahid Date: Fri, 15 Dec 2023 12:55:06 -0800 Subject: [PATCH 082/129] SPARK-45959. added new tests. Handled flattening of Project when done using dataFrame.select instead of withColumn api --- .../spark/sql/connect/planner/SparkConnectPlanner.scala | 6 +++--- .../spark/sql/catalyst/plans/logical/LogicalPlan.scala | 2 +- sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 4 ++-- .../scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala index 7cb6b5ac7ddc..ee7aec41c64d 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala @@ -986,12 +986,12 @@ class SparkConnectPlanner( (rename.getColName, rename.getNewColName) }.unzip val ds = Dataset.ofRows(session, transformRelation(rel.getInput)) - ds.logicalPlan.setTagValue(LogicalPlan.SKIP_FLATTENING, true) + ds.logicalPlan.setTagValue(LogicalPlan.SKIP_EARLY_PROJECT_COLLAPSE, true) ds.withColumnsRenamed(colNames, newColNames).logicalPlan } else { // for backward compatibility val ds = Dataset.ofRows(session, transformRelation(rel.getInput)) - ds.logicalPlan.setTagValue(LogicalPlan.SKIP_FLATTENING, true) + ds.logicalPlan.setTagValue(LogicalPlan.SKIP_EARLY_PROJECT_COLLAPSE, true) ds.withColumnsRenamed(rel.getRenameColumnsMapMap).logicalPlan } } @@ -1014,7 +1014,7 @@ class SparkConnectPlanner( }.unzip3 val ds = Dataset.ofRows(session, transformRelation(rel.getInput)) - ds.logicalPlan.setTagValue(LogicalPlan.SKIP_FLATTENING, true) + ds.logicalPlan.setTagValue(LogicalPlan.SKIP_EARLY_PROJECT_COLLAPSE, true) ds.withColumns(colNames, cols, metadata).logicalPlan } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index 9726a031e24b..26f19cb164c5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -198,7 +198,7 @@ object LogicalPlan { // to the old code path. private[spark] val PLAN_ID_TAG = TreeNodeTag[Long]("plan_id") private[spark] val IS_METADATA_COL = TreeNodeTag[Unit]("is_metadata_col") - private[spark] val SKIP_FLATTENING = TreeNodeTag[Boolean]("skipFlattening") + private[spark] val SKIP_EARLY_PROJECT_COLLAPSE = TreeNodeTag[Boolean]("skipEarlyProjectCollapse") } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 28e4bc1da459..c53ded42f012 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -1576,7 +1576,7 @@ class Dataset[T] private[sql]( val newProjList = untypedCols.map(_.named) (logicalPlan, newProjList, id) match { case EarlyCollapsableProjects(flattendPlan) if !this.isStreaming && - !logicalPlan.getTagValue(LogicalPlan.SKIP_FLATTENING).getOrElse(false) => flattendPlan + !logicalPlan.getTagValue(LogicalPlan.SKIP_EARLY_PROJECT_COLLAPSE).getOrElse(false) => flattendPlan case _ => Project(newProjList, logicalPlan) } @@ -2958,7 +2958,7 @@ class Dataset[T] private[sql]( withPlan( (logicalPlan, projectList, id) match { case EarlyCollapsableProjects(flattendPlan) if !this.isStreaming && - !logicalPlan.getTagValue(LogicalPlan.SKIP_FLATTENING).getOrElse(false) => flattendPlan + !logicalPlan.getTagValue(LogicalPlan.SKIP_EARLY_PROJECT_COLLAPSE).getOrElse(false) => flattendPlan case _ => Project(projectList, logicalPlan) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala index 474e0136ef90..fd25d657779f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala @@ -244,10 +244,10 @@ class AddColumnsFlattenSuite extends QueryTest // then obtain optimized transformation which adds new project val logicalPlan = baseDf.logicalPlan val newDfUnopt = try { - logicalPlan.setTagValue[Boolean](LogicalPlan.SKIP_FLATTENING, true) + logicalPlan.setTagValue[Boolean](LogicalPlan.SKIP_EARLY_PROJECT_COLLAPSE, true) transformation(baseDf) } finally { - logicalPlan.unsetTagValue(LogicalPlan.SKIP_FLATTENING) + logicalPlan.unsetTagValue(LogicalPlan.SKIP_EARLY_PROJECT_COLLAPSE) } (newDfOpt, newDfUnopt) } From 4f6d44705e28847bd4351b8162c212459e002692 Mon Sep 17 00:00:00 2001 From: ashahid Date: Fri, 15 Dec 2023 12:57:17 -0800 Subject: [PATCH 083/129] SPARK-45959. added new tests. Handled flattening of Project when done using dataFrame.select instead of withColumn api --- sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 6 ++++-- ...nsFlattenSuite.scala => EarlyProjectCollapseSuite.scala} | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) rename sql/core/src/test/scala/org/apache/spark/sql/{AddColumnsFlattenSuite.scala => EarlyProjectCollapseSuite.scala} (99%) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index c53ded42f012..2d127b0a2322 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -1576,7 +1576,8 @@ class Dataset[T] private[sql]( val newProjList = untypedCols.map(_.named) (logicalPlan, newProjList, id) match { case EarlyCollapsableProjects(flattendPlan) if !this.isStreaming && - !logicalPlan.getTagValue(LogicalPlan.SKIP_EARLY_PROJECT_COLLAPSE).getOrElse(false) => flattendPlan + !logicalPlan.getTagValue(LogicalPlan.SKIP_EARLY_PROJECT_COLLAPSE).getOrElse(false) => + flattendPlan case _ => Project(newProjList, logicalPlan) } @@ -2958,7 +2959,8 @@ class Dataset[T] private[sql]( withPlan( (logicalPlan, projectList, id) match { case EarlyCollapsableProjects(flattendPlan) if !this.isStreaming && - !logicalPlan.getTagValue(LogicalPlan.SKIP_EARLY_PROJECT_COLLAPSE).getOrElse(false) => flattendPlan + !logicalPlan.getTagValue(LogicalPlan.SKIP_EARLY_PROJECT_COLLAPSE).getOrElse(false) => + flattendPlan case _ => Project(projectList, logicalPlan) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/EarlyProjectCollapseSuite.scala similarity index 99% rename from sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala rename to sql/core/src/test/scala/org/apache/spark/sql/EarlyProjectCollapseSuite.scala index fd25d657779f..eff53fd9d54f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/AddColumnsFlattenSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/EarlyProjectCollapseSuite.scala @@ -24,7 +24,7 @@ import org.apache.spark.sql.functions.col import org.apache.spark.sql.test.SharedSparkSession -class AddColumnsFlattenSuite extends QueryTest +class EarlyProjectCollapseSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlanHelper { import testImplicits._ From 9a316c1867680ae4487c7cf7881b08961b366f75 Mon Sep 17 00:00:00 2001 From: ashahid Date: Fri, 15 Dec 2023 13:00:53 -0800 Subject: [PATCH 084/129] SPARK-45959. added new tests. Handled flattening of Project when done using dataFrame.select instead of withColumn api --- sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 6 +++--- ...CollapsableProjects.scala => EarlyCollapseProject.scala} | 2 +- ...tCollapseSuite.scala => EarlyCollapseProjectSuite.scala} | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) rename sql/core/src/main/scala/org/apache/spark/sql/internal/{EarlyCollapsableProjects.scala => EarlyCollapseProject.scala} (99%) rename sql/core/src/test/scala/org/apache/spark/sql/{EarlyProjectCollapseSuite.scala => EarlyCollapseProjectSuite.scala} (99%) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 2d127b0a2322..3a13821dc696 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -59,7 +59,7 @@ import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, DataSourceV2ScanRelation, FileTable} import org.apache.spark.sql.execution.python.EvaluatePython import org.apache.spark.sql.execution.stat.StatFunctions -import org.apache.spark.sql.internal.{EarlyCollapsableProjects, SQLConf} +import org.apache.spark.sql.internal.{EarlyCollapseProject, SQLConf} import org.apache.spark.sql.streaming.DataStreamWriter import org.apache.spark.sql.types._ import org.apache.spark.sql.util.SchemaUtils @@ -1575,7 +1575,7 @@ class Dataset[T] private[sql]( } val newProjList = untypedCols.map(_.named) (logicalPlan, newProjList, id) match { - case EarlyCollapsableProjects(flattendPlan) if !this.isStreaming && + case EarlyCollapseProject(flattendPlan) if !this.isStreaming && !logicalPlan.getTagValue(LogicalPlan.SKIP_EARLY_PROJECT_COLLAPSE).getOrElse(false) => flattendPlan @@ -2958,7 +2958,7 @@ class Dataset[T] private[sql]( sparkSession.sessionState.conf.caseSensitiveAnalysis) withPlan( (logicalPlan, projectList, id) match { - case EarlyCollapsableProjects(flattendPlan) if !this.isStreaming && + case EarlyCollapseProject(flattendPlan) if !this.isStreaming && !logicalPlan.getTagValue(LogicalPlan.SKIP_EARLY_PROJECT_COLLAPSE).getOrElse(false) => flattendPlan diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapsableProjects.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala similarity index 99% rename from sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapsableProjects.scala rename to sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala index 39791a605557..2f18330a7b1f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapsableProjects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} -private[sql] object EarlyCollapsableProjects { +private[sql] object EarlyCollapseProject { object OpType extends Enumeration { type OpType = Value val AddNewColumnsOnly, RemapOnly, Unknown = Value diff --git a/sql/core/src/test/scala/org/apache/spark/sql/EarlyProjectCollapseSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala similarity index 99% rename from sql/core/src/test/scala/org/apache/spark/sql/EarlyProjectCollapseSuite.scala rename to sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala index eff53fd9d54f..d6ff8b918725 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/EarlyProjectCollapseSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala @@ -24,7 +24,7 @@ import org.apache.spark.sql.functions.col import org.apache.spark.sql.test.SharedSparkSession -class EarlyProjectCollapseSuite extends QueryTest +class EarlyCollapseProjectSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlanHelper { import testImplicits._ From cfb2b04f821e80795ae8a6f518bf160cb09eef8d Mon Sep 17 00:00:00 2001 From: ashahid Date: Fri, 15 Dec 2023 14:10:43 -0800 Subject: [PATCH 085/129] SPARK-45959. reworked and simplified the code. Instead of Collapsing before analyse, now collapsing after analyze --- .../scala/org/apache/spark/sql/Dataset.scala | 19 ++------ .../spark/sql/execution/QueryExecution.scala | 8 +++- .../sql/internal/EarlyCollapseProject.scala | 48 +++---------------- .../spark/sql/EarlyCollapseProjectSuite.scala | 2 +- 4 files changed, 17 insertions(+), 60 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 3a13821dc696..d15e6716c1ad 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -59,7 +59,7 @@ import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, DataSourceV2ScanRelation, FileTable} import org.apache.spark.sql.execution.python.EvaluatePython import org.apache.spark.sql.execution.stat.StatFunctions -import org.apache.spark.sql.internal.{EarlyCollapseProject, SQLConf} +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.streaming.DataStreamWriter import org.apache.spark.sql.types._ import org.apache.spark.sql.util.SchemaUtils @@ -1573,14 +1573,7 @@ class Dataset[T] private[sql]( case other => other } - val newProjList = untypedCols.map(_.named) - (logicalPlan, newProjList, id) match { - case EarlyCollapseProject(flattendPlan) if !this.isStreaming && - !logicalPlan.getTagValue(LogicalPlan.SKIP_EARLY_PROJECT_COLLAPSE).getOrElse(false) => - flattendPlan - - case _ => Project(newProjList, logicalPlan) - } + Project(untypedCols.map(_.named), logicalPlan) } } @@ -2957,13 +2950,7 @@ class Dataset[T] private[sql]( projectList.map(_.name), sparkSession.sessionState.conf.caseSensitiveAnalysis) withPlan( - (logicalPlan, projectList, id) match { - case EarlyCollapseProject(flattendPlan) if !this.isStreaming && - !logicalPlan.getTagValue(LogicalPlan.SKIP_EARLY_PROJECT_COLLAPSE).getOrElse(false) => - flattendPlan - - case _ => Project(projectList, logicalPlan) - } + Project(projectList, logicalPlan) ) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala index eb5b38d42881..cb9b496e103e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala @@ -41,7 +41,7 @@ import org.apache.spark.sql.execution.dynamicpruning.PlanDynamicPruningFilters import org.apache.spark.sql.execution.exchange.EnsureRequirements import org.apache.spark.sql.execution.reuse.ReuseExchangeAndSubquery import org.apache.spark.sql.execution.streaming.{IncrementalExecution, OffsetSeqMetadata, WatermarkPropagator} -import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.internal.{EarlyCollapseProject, SQLConf} import org.apache.spark.sql.streaming.OutputMode import org.apache.spark.util.ArrayImplicits._ import org.apache.spark.util.Utils @@ -85,7 +85,11 @@ class QueryExecution( lazy val analyzed: LogicalPlan = { val plan = executePhase(QueryPlanningTracker.ANALYSIS) { // We can't clone `logical` here, which will reset the `_analyzed` flag. - sparkSession.sessionState.analyzer.executeAndCheck(logical, tracker) + val analyzedPlan = sparkSession.sessionState.analyzer.executeAndCheck(logical, tracker) + analyzedPlan match { + case EarlyCollapseProject(collapsedPlan) => collapsedPlan + case _ => analyzedPlan + } } tracker.setAnalyzed(plan) plan diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala index 2f18330a7b1f..089d31e08b68 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala @@ -19,29 +19,23 @@ package org.apache.spark.sql.internal import scala.util.{Failure, Success, Try} -import org.apache.spark.sql.Dataset -import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction} import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, Expression, NamedExpression, UserDefinedExpression, WindowExpression} import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} - private[sql] object EarlyCollapseProject { object OpType extends Enumeration { type OpType = Value val AddNewColumnsOnly, RemapOnly, Unknown = Value } - def unapply(tuple: (LogicalPlan, Seq[NamedExpression], Long)): Option[LogicalPlan] = { - val (logicalPlan, newProjList, did) = tuple + def unapply(logicalPlan: LogicalPlan): Option[LogicalPlan] = { + logicalPlan match { - case p@Project(projList, child: LogicalPlan) - if newProjList.flatMap(_.collectLeaves()).forall { - case ar: AttributeReference if ar.metadata.contains(Dataset.DATASET_ID_KEY) && - ar.metadata.getLong(Dataset.DATASET_ID_KEY) != did => false - case _ => true - } => + case Project(newProjList, p @ Project(projList, child)) if !p.getTagValue( + LogicalPlan.SKIP_EARLY_PROJECT_COLLAPSE).getOrElse(false) + => val currentOutputAttribs = AttributeSet(p.output) // In the new column list identify those Named Expressions which are just attributes and @@ -80,9 +74,6 @@ private[sql] object EarlyCollapseProject { case ex: AggregateExpression => ex case ex: WindowExpression => ex case ex: UserDefinedExpression => ex - case u: UnresolvedAttribute if u.nameParts.size != 1 => u - case u: UnresolvedFunction if u.nameParts.size == 1 & u.nameParts.head == "struct" => - u }.nonEmpty)) { None } else { @@ -91,10 +82,6 @@ private[sql] object EarlyCollapseProject { case attr: AttributeReference => projList.find( _.toAttribute.canonicalized == attr.canonicalized).getOrElse(attr) - case ua: UnresolvedAttribute => projList.find(_.toAttribute.name.equals(ua.name)). - getOrElse(throw new UnsupportedOperationException("Not able to flatten" + - s" unresolved attribute $ua")) - case anyOtherExpr => (anyOtherExpr transformUp { case attr: AttributeReference => @@ -103,24 +90,11 @@ private[sql] object EarlyCollapseProject { case al: Alias => al.child case x => x }).getOrElse(attr) - - case u: UnresolvedAttribute => attribsRemappedInProj.get(u.name).orElse( - projList.find(_.toAttribute.name.equals(u.name)).map { - case al: Alias => al.child - - case u: UnresolvedAttribute => - throw new UnsupportedOperationException("Not able to flatten" + - s" unresolved attribute $u") - - case x => x - }).getOrElse(throw new UnsupportedOperationException("Not able to flatten" + - s" unresolved attribute $u")) }).asInstanceOf[NamedExpression] } } remappedNewProjListResult match { - case Success(remappedNewProjList) => - Option(Project(remappedNewProjList, child)) + case Success(remappedNewProjList) => Option(Project(remappedNewProjList, child)) case Failure(_) => None } @@ -133,10 +107,6 @@ private[sql] object EarlyCollapseProject { case attr: AttributeReference => projList.find( _.toAttribute.canonicalized == attr.canonicalized).get - case ua: UnresolvedAttribute if ua.nameParts.size == 1 => - projList.find(_.toAttribute.name.equals(ua.name)). - getOrElse(throw new UnsupportedOperationException("Not able to flatten" + - s" unresolved attribute $ua")) case al@Alias(ar: AttributeReference, name) => projList.find(_.toAttribute.canonicalized == ar.canonicalized).map { @@ -146,14 +116,10 @@ private[sql] object EarlyCollapseProject { case _: AttributeReference => al }.get - - case x => throw new UnsupportedOperationException("Not able to flatten" + - s" unresolved attribute $x") } } remappedNewProjListResult match { - case Success(remappedNewProjList) => - Option(Project(remappedNewProjList, child)) + case Success(remappedNewProjList) => Option(Project(remappedNewProjList, child)) case Failure(_) => None } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala index d6ff8b918725..7349275535dc 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala @@ -252,7 +252,7 @@ class EarlyCollapseProjectSuite extends QueryTest (newDfOpt, newDfUnopt) } - private def collectNodes(df: DataFrame): Seq[LogicalPlan] = df.queryExecution.logical.collect { + private def collectNodes(df: DataFrame): Seq[LogicalPlan] = df.logicalPlan.collect { case l => l } } From 9e3bd44cbbd652ad9271c10cce099570609c17a6 Mon Sep 17 00:00:00 2001 From: ashahid Date: Fri, 15 Dec 2023 15:59:00 -0800 Subject: [PATCH 086/129] SPARK-45959. reworked and simplified the code. Instead of Collapsing before analyse, now collapsing after analyze --- .../spark/sql/connect/planner/SparkConnectPlanner.scala | 3 --- .../spark/sql/catalyst/plans/logical/LogicalPlan.scala | 1 + .../apache/spark/sql/internal/EarlyCollapseProject.scala | 9 +++++---- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala index ee7aec41c64d..ecee2ba383a1 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala @@ -986,12 +986,10 @@ class SparkConnectPlanner( (rename.getColName, rename.getNewColName) }.unzip val ds = Dataset.ofRows(session, transformRelation(rel.getInput)) - ds.logicalPlan.setTagValue(LogicalPlan.SKIP_EARLY_PROJECT_COLLAPSE, true) ds.withColumnsRenamed(colNames, newColNames).logicalPlan } else { // for backward compatibility val ds = Dataset.ofRows(session, transformRelation(rel.getInput)) - ds.logicalPlan.setTagValue(LogicalPlan.SKIP_EARLY_PROJECT_COLLAPSE, true) ds.withColumnsRenamed(rel.getRenameColumnsMapMap).logicalPlan } } @@ -1014,7 +1012,6 @@ class SparkConnectPlanner( }.unzip3 val ds = Dataset.ofRows(session, transformRelation(rel.getInput)) - ds.logicalPlan.setTagValue(LogicalPlan.SKIP_EARLY_PROJECT_COLLAPSE, true) ds.withColumns(colNames, cols, metadata).logicalPlan } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index 26f19cb164c5..7daea8197432 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -198,6 +198,7 @@ object LogicalPlan { // to the old code path. private[spark] val PLAN_ID_TAG = TreeNodeTag[Long]("plan_id") private[spark] val IS_METADATA_COL = TreeNodeTag[Unit]("is_metadata_col") + // For Testing private[spark] val SKIP_EARLY_PROJECT_COLLAPSE = TreeNodeTag[Boolean]("skipEarlyProjectCollapse") } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala index 089d31e08b68..42f140348e86 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala @@ -30,11 +30,12 @@ private[sql] object EarlyCollapseProject { } def unapply(logicalPlan: LogicalPlan): Option[LogicalPlan] = { - - logicalPlan match { - case Project(newProjList, p @ Project(projList, child)) if !p.getTagValue( - LogicalPlan.SKIP_EARLY_PROJECT_COLLAPSE).getOrElse(false) + case newP @ Project(newProjList, p @ Project(projList, child)) if + !p.getTagValue(LogicalPlan.SKIP_EARLY_PROJECT_COLLAPSE).getOrElse(false) && + !newP.getTagValue(LogicalPlan.SKIP_EARLY_PROJECT_COLLAPSE).getOrElse(false) && + p.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty && + newP.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty => val currentOutputAttribs = AttributeSet(p.output) From bbea2c489bec96948be4978d443145c7814da8d7 Mon Sep 17 00:00:00 2001 From: ashahid Date: Fri, 15 Dec 2023 20:05:26 -0800 Subject: [PATCH 087/129] SPARK-45959. reworked and simplified the code. Instead of Collapsing before analyse, now collapsing after analyze --- .../sql/internal/EarlyCollapseProject.scala | 1 + .../analyzer-results/natural-join.sql.out | 269 +++++---- .../sql-tests/analyzer-results/pivot.sql.out | 536 +++++++++--------- .../analyzer-results/postgreSQL/join.sql.out | 370 ++++++------ .../udf/postgreSQL/udf-join.sql.out | 219 ++++--- .../analyzer-results/udf/udf-pivot.sql.out | 536 +++++++++--------- .../analyzer-results/using-join.sql.out | 274 +++++---- 7 files changed, 1063 insertions(+), 1142 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala index 42f140348e86..e12e99f7c089 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala @@ -36,6 +36,7 @@ private[sql] object EarlyCollapseProject { !newP.getTagValue(LogicalPlan.SKIP_EARLY_PROJECT_COLLAPSE).getOrElse(false) && p.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty && newP.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty + && newProjList.size >= projList.size => val currentOutputAttribs = AttributeSet(p.output) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out index 41c8876a7d25..53c418a3f85a 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out @@ -71,20 +71,19 @@ CreateViewCommand `nt4`, select * from values SELECT * FROM nt1 natural join nt2 -- !query analysis Project [k#x, v1#x, v2#x] -+- Project [k#x, v1#x, v2#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -299,40 +298,38 @@ Project [k#x] SELECT nt1.*, nt2.* FROM nt1 natural join nt2 -- !query analysis Project [k#x, v1#x, k#x, v2#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT *, nt2.k FROM nt1 natural join nt2 -- !query analysis Project [k#x, v1#x, v2#x, k#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -510,56 +507,54 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException SELECT * FROM nt1 natural join nt2 natural join nt3 -- !query analysis Project [k#x, v1#x, v2#x, v3#x] -+- Project [k#x, v1#x, v2#x, v3#x] - +- Join Inner, (k#x = k#x) - :- Project [k#x, v1#x, v2#x] - : +- Join Inner, (k#x = k#x) - : :- SubqueryAlias nt1 - : : +- View (`nt1`, [k#x,v1#x]) - : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : : +- Project [k#x, v1#x] - : : +- SubqueryAlias nt1 - : : +- LocalRelation [k#x, v1#x] - : +- SubqueryAlias nt2 - : +- View (`nt2`, [k#x,v2#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - : +- Project [k#x, v2#x] - : +- SubqueryAlias nt2 - : +- LocalRelation [k#x, v2#x] - +- SubqueryAlias nt3 - +- View (`nt3`, [k#x,v3#x]) - +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] - +- Project [k#x, v3#x] - +- SubqueryAlias nt3 - +- LocalRelation [k#x, v3#x] ++- Join Inner, (k#x = k#x) + :- Project [k#x, v1#x, v2#x] + : +- Join Inner, (k#x = k#x) + : :- SubqueryAlias nt1 + : : +- View (`nt1`, [k#x,v1#x]) + : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : : +- Project [k#x, v1#x] + : : +- SubqueryAlias nt1 + : : +- LocalRelation [k#x, v1#x] + : +- SubqueryAlias nt2 + : +- View (`nt2`, [k#x,v2#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + : +- Project [k#x, v2#x] + : +- SubqueryAlias nt2 + : +- LocalRelation [k#x, v2#x] + +- SubqueryAlias nt3 + +- View (`nt3`, [k#x,v3#x]) + +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] + +- Project [k#x, v3#x] + +- SubqueryAlias nt3 + +- LocalRelation [k#x, v3#x] -- !query SELECT nt1.*, nt2.*, nt3.* FROM nt1 natural join nt2 natural join nt3 -- !query analysis Project [k#x, v1#x, k#x, v2#x, k#x, v3#x] -+- Project [k#x, v1#x, v2#x, v3#x, k#x, k#x] - +- Join Inner, (k#x = k#x) - :- Project [k#x, v1#x, v2#x, k#x] - : +- Join Inner, (k#x = k#x) - : :- SubqueryAlias nt1 - : : +- View (`nt1`, [k#x,v1#x]) - : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : : +- Project [k#x, v1#x] - : : +- SubqueryAlias nt1 - : : +- LocalRelation [k#x, v1#x] - : +- SubqueryAlias nt2 - : +- View (`nt2`, [k#x,v2#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - : +- Project [k#x, v2#x] - : +- SubqueryAlias nt2 - : +- LocalRelation [k#x, v2#x] - +- SubqueryAlias nt3 - +- View (`nt3`, [k#x,v3#x]) - +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] - +- Project [k#x, v3#x] - +- SubqueryAlias nt3 - +- LocalRelation [k#x, v3#x] ++- Join Inner, (k#x = k#x) + :- Project [k#x, v1#x, v2#x, k#x] + : +- Join Inner, (k#x = k#x) + : :- SubqueryAlias nt1 + : : +- View (`nt1`, [k#x,v1#x]) + : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : : +- Project [k#x, v1#x] + : : +- SubqueryAlias nt1 + : : +- LocalRelation [k#x, v1#x] + : +- SubqueryAlias nt2 + : +- View (`nt2`, [k#x,v2#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + : +- Project [k#x, v2#x] + : +- SubqueryAlias nt2 + : +- LocalRelation [k#x, v2#x] + +- SubqueryAlias nt3 + +- View (`nt3`, [k#x,v3#x]) + +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] + +- Project [k#x, v3#x] + +- SubqueryAlias nt3 + +- LocalRelation [k#x, v3#x] -- !query @@ -620,61 +615,59 @@ Project [k#x, v1#x, v2#x, k#x, v3#x] SELECT * FROM nt1 natural join nt2 join nt3 on nt2.k = nt3.k -- !query analysis Project [k#x, v1#x, v2#x, k#x, v3#x] -+- Project [k#x, v1#x, v2#x, k#x, v3#x] - +- Join Inner, (k#x = k#x) - :- Project [k#x, v1#x, v2#x, k#x] - : +- Join Inner, (k#x = k#x) - : :- SubqueryAlias nt1 - : : +- View (`nt1`, [k#x,v1#x]) - : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : : +- Project [k#x, v1#x] - : : +- SubqueryAlias nt1 - : : +- LocalRelation [k#x, v1#x] - : +- SubqueryAlias nt2 - : +- View (`nt2`, [k#x,v2#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - : +- Project [k#x, v2#x] - : +- SubqueryAlias nt2 - : +- LocalRelation [k#x, v2#x] - +- SubqueryAlias nt3 - +- View (`nt3`, [k#x,v3#x]) - +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] - +- Project [k#x, v3#x] - +- SubqueryAlias nt3 - +- LocalRelation [k#x, v3#x] ++- Join Inner, (k#x = k#x) + :- Project [k#x, v1#x, v2#x, k#x] + : +- Join Inner, (k#x = k#x) + : :- SubqueryAlias nt1 + : : +- View (`nt1`, [k#x,v1#x]) + : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : : +- Project [k#x, v1#x] + : : +- SubqueryAlias nt1 + : : +- LocalRelation [k#x, v1#x] + : +- SubqueryAlias nt2 + : +- View (`nt2`, [k#x,v2#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + : +- Project [k#x, v2#x] + : +- SubqueryAlias nt2 + : +- LocalRelation [k#x, v2#x] + +- SubqueryAlias nt3 + +- View (`nt3`, [k#x,v3#x]) + +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] + +- Project [k#x, v3#x] + +- SubqueryAlias nt3 + +- LocalRelation [k#x, v3#x] -- !query SELECT nt1.*, nt2.*, nt3.*, nt4.* FROM nt1 natural join nt2 natural join nt3 natural join nt4 -- !query analysis Project [k#x, v1#x, k#x, v2#x, k#x, v3#x, k#x, v4#x] -+- Project [k#x, v1#x, v2#x, v3#x, v4#x, k#x, k#x, k#x] - +- Join Inner, (k#x = k#x) - :- Project [k#x, v1#x, v2#x, v3#x, k#x, k#x] - : +- Join Inner, (k#x = k#x) - : :- Project [k#x, v1#x, v2#x, k#x] - : : +- Join Inner, (k#x = k#x) - : : :- SubqueryAlias nt1 - : : : +- View (`nt1`, [k#x,v1#x]) - : : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : : : +- Project [k#x, v1#x] - : : : +- SubqueryAlias nt1 - : : : +- LocalRelation [k#x, v1#x] - : : +- SubqueryAlias nt2 - : : +- View (`nt2`, [k#x,v2#x]) - : : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - : : +- Project [k#x, v2#x] - : : +- SubqueryAlias nt2 - : : +- LocalRelation [k#x, v2#x] - : +- SubqueryAlias nt3 - : +- View (`nt3`, [k#x,v3#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] - : +- Project [k#x, v3#x] - : +- SubqueryAlias nt3 - : +- LocalRelation [k#x, v3#x] - +- SubqueryAlias nt4 - +- View (`nt4`, [k#x,v4#x]) - +- Project [cast(k#x as string) AS k#x, cast(v4#x as int) AS v4#x] - +- Project [k#x, v4#x] - +- SubqueryAlias nt4 - +- LocalRelation [k#x, v4#x] ++- Join Inner, (k#x = k#x) + :- Project [k#x, v1#x, v2#x, v3#x, k#x, k#x] + : +- Join Inner, (k#x = k#x) + : :- Project [k#x, v1#x, v2#x, k#x] + : : +- Join Inner, (k#x = k#x) + : : :- SubqueryAlias nt1 + : : : +- View (`nt1`, [k#x,v1#x]) + : : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : : : +- Project [k#x, v1#x] + : : : +- SubqueryAlias nt1 + : : : +- LocalRelation [k#x, v1#x] + : : +- SubqueryAlias nt2 + : : +- View (`nt2`, [k#x,v2#x]) + : : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + : : +- Project [k#x, v2#x] + : : +- SubqueryAlias nt2 + : : +- LocalRelation [k#x, v2#x] + : +- SubqueryAlias nt3 + : +- View (`nt3`, [k#x,v3#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] + : +- Project [k#x, v3#x] + : +- SubqueryAlias nt3 + : +- LocalRelation [k#x, v3#x] + +- SubqueryAlias nt4 + +- View (`nt4`, [k#x,v4#x]) + +- Project [cast(k#x as string) AS k#x, cast(v4#x as int) AS v4#x] + +- Project [k#x, v4#x] + +- SubqueryAlias nt4 + +- LocalRelation [k#x, v4#x] diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out index ead14bdd882d..23df9320925d 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out @@ -59,18 +59,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET#xL, Java#xL] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java#xL] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java#xL] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -80,16 +79,15 @@ PIVOT ( FOR year IN (2012, 2013) ) -- !query analysis -Project [course#x, 2012#xL, 2013#xL] -+- Project [course#x, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[0] AS 2012#xL, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[1] AS 2013#xL] - +- Aggregate [course#x], [course#x, pivotfirst(year#x, sum(coursesales.earnings)#xL, 2012, 2013, 0, 0) AS __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x] - +- Aggregate [course#x, year#x], [course#x, year#x, sum(earnings#x) AS sum(coursesales.earnings)#xL] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [course#x, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[0] AS 2012#xL, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[1] AS 2013#xL] ++- Aggregate [course#x], [course#x, pivotfirst(year#x, sum(coursesales.earnings)#xL, 2012, 2013, 0, 0) AS __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x] + +- Aggregate [course#x, year#x], [course#x, year#x, sum(earnings#x) AS sum(coursesales.earnings)#xL] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -101,18 +99,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET_sum(earnings)#xL, dotNET_avg(earnings)#x, Java_sum(earnings)#xL, Java_avg(earnings)#x] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_avg(earnings)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x[1] AS Java_avg(earnings)#x] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, avg(__auto_generated_subquery_name.earnings)#x, dotNET, Java, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, avg(earnings#x) AS avg(__auto_generated_subquery_name.earnings)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_avg(earnings)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x[1] AS Java_avg(earnings)#x] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, avg(__auto_generated_subquery_name.earnings)#x, dotNET, Java, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, avg(earnings#x) AS avg(__auto_generated_subquery_name.earnings)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -124,18 +121,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [dotNET#xL, Java#xL] -+- Project [__pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java#xL] - +- Aggregate [pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [course#x], [course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java#xL] ++- Aggregate [pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [course#x], [course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -147,18 +143,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [dotNET_sum(earnings)#xL, dotNET_min(year)#x, Java_sum(earnings)#xL, Java_min(year)#x] -+- Project [__pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x[0] AS dotNET_min(year)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x[1] AS Java_min(year)#x] - +- Aggregate [pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, min(__auto_generated_subquery_name.year)#x, dotNET, Java, 0, 0) AS __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x] - +- Aggregate [course#x], [course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, min(year#x) AS min(__auto_generated_subquery_name.year)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x[0] AS dotNET_min(year)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x[1] AS Java_min(year)#x] ++- Aggregate [pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, min(__auto_generated_subquery_name.year)#x, dotNET, Java, 0, 0) AS __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x] + +- Aggregate [course#x], [course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, min(year#x) AS min(__auto_generated_subquery_name.year)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -172,25 +167,24 @@ PIVOT ( FOR s IN (1, 2) ) -- !query analysis -Project [course#x, year#x, 1#xL, 2#xL] -+- Project [course#x, year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS 1#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS 2#xL] - +- Aggregate [course#x, year#x], [course#x, year#x, pivotfirst(s#x, sum(__auto_generated_subquery_name.earnings)#xL, 1, 2, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [course#x, year#x, s#x], [course#x, year#x, s#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [course#x, year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS 1#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS 2#xL] ++- Aggregate [course#x, year#x], [course#x, year#x, pivotfirst(s#x, sum(__auto_generated_subquery_name.earnings)#xL, 1, 2, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [course#x, year#x, s#x], [course#x, year#x, s#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -204,25 +198,24 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET_sum(earnings)#xL, dotNET_min(s)#x, Java_sum(earnings)#xL, Java_min(s)#x] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x[0] AS dotNET_min(s)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x[1] AS Java_min(s)#x] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, min(__auto_generated_subquery_name.s)#x, dotNET, Java, 0, 0) AS __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, min(s#x) AS min(__auto_generated_subquery_name.s)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x[0] AS dotNET_min(s)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x[1] AS Java_min(s)#x] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, min(__auto_generated_subquery_name.s)#x, dotNET, Java, 0, 0) AS __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, min(s#x) AS min(__auto_generated_subquery_name.s)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -236,25 +229,24 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET#xL, Java#xL] -+- Project [year#x, __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x[0] AS dotNET#xL, __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x[1] AS Java#xL] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))#xL, dotNET, Java, 0, 0) AS __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, sum((earnings#x * s#x)) AS sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x[0] AS dotNET#xL, __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x[1] AS Java#xL] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))#xL, dotNET, Java, 0, 0) AS __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, sum((earnings#x * s#x)) AS sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -266,18 +258,17 @@ PIVOT ( FOR y IN (2012, 2013) ) -- !query analysis -Project [2012_s#xL, 2013_s#xL, 2012_a#x, 2013_a#x, c#x] -+- Project [c#x, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[0] AS 2012_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[0] AS 2012_a#x, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[1] AS 2013_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[1] AS 2013_a#x] - +- Aggregate [c#x], [c#x, pivotfirst(y#x, sum(__auto_generated_subquery_name.e) AS s#xL, 2012, 2013, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x, pivotfirst(y#x, avg(__auto_generated_subquery_name.e) AS a#x, 2012, 2013, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x] - +- Aggregate [c#x, y#x], [c#x, y#x, sum(e#x) AS sum(__auto_generated_subquery_name.e) AS s#xL, avg(e#x) AS avg(__auto_generated_subquery_name.e) AS a#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[0] AS 2012_s#xL, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[1] AS 2013_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[0] AS 2012_a#x, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[1] AS 2013_a#x, c#x] ++- Aggregate [c#x], [c#x, pivotfirst(y#x, sum(__auto_generated_subquery_name.e) AS s#xL, 2012, 2013, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x, pivotfirst(y#x, avg(__auto_generated_subquery_name.e) AS a#x, 2012, 2013, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x] + +- Aggregate [c#x, y#x], [c#x, y#x, sum(e#x) AS sum(__auto_generated_subquery_name.e) AS s#xL, avg(e#x) AS avg(__auto_generated_subquery_name.e) AS a#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -289,18 +280,17 @@ PIVOT ( FOR y IN (2012 as firstYear, 2013 secondYear) ) -- !query analysis -Project [firstYear_s#xL, secondYear_s#xL, firstYear_a#x, secondYear_a#x, c#x] -+- Project [c#x, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[0] AS firstYear_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[0] AS firstYear_a#x, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[1] AS secondYear_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[1] AS secondYear_a#x] - +- Aggregate [c#x], [c#x, pivotfirst(y#x, sum(__auto_generated_subquery_name.e) AS s#xL, 2012, 2013, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x, pivotfirst(y#x, avg(__auto_generated_subquery_name.e) AS a#x, 2012, 2013, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x] - +- Aggregate [c#x, y#x], [c#x, y#x, sum(e#x) AS sum(__auto_generated_subquery_name.e) AS s#xL, avg(e#x) AS avg(__auto_generated_subquery_name.e) AS a#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[0] AS firstYear_s#xL, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[1] AS secondYear_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[0] AS firstYear_a#x, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[1] AS secondYear_a#x, c#x] ++- Aggregate [c#x], [c#x, pivotfirst(y#x, sum(__auto_generated_subquery_name.e) AS s#xL, 2012, 2013, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x, pivotfirst(y#x, avg(__auto_generated_subquery_name.e) AS a#x, 2012, 2013, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x] + +- Aggregate [c#x, y#x], [c#x, y#x, sum(e#x) AS sum(__auto_generated_subquery_name.e) AS s#xL, avg(e#x) AS avg(__auto_generated_subquery_name.e) AS a#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -373,18 +363,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET_CEIL(sum(earnings))#xL, dotNET_a1#x, Java_CEIL(sum(earnings))#xL, Java_a1#x] -+- Project [year#x, __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x[0] AS dotNET_CEIL(sum(earnings))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[0] AS dotNET_a1#x, __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x[1] AS Java_CEIL(sum(earnings))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[1] AS Java_a1#x] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, CEIL(sum(__auto_generated_subquery_name.earnings))#xL, dotNET, Java, 0, 0) AS __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x, pivotfirst(course#x, (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x, dotNET, Java, 0, 0) AS __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, CEIL(sum(earnings#x)) AS CEIL(sum(__auto_generated_subquery_name.earnings))#xL, (avg(earnings#x) + cast(1 as double)) AS (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [year#x, __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x[0] AS dotNET_CEIL(sum(earnings))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[0] AS dotNET_a1#x, __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x[1] AS Java_CEIL(sum(earnings))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[1] AS Java_a1#x] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, CEIL(sum(__auto_generated_subquery_name.earnings))#xL, dotNET, Java, 0, 0) AS __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x, pivotfirst(course#x, (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x, dotNET, Java, 0, 0) AS __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, CEIL(sum(earnings#x)) AS CEIL(sum(__auto_generated_subquery_name.earnings))#xL, (avg(earnings#x) + cast(1 as double)) AS (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -421,25 +410,24 @@ PIVOT ( FOR (course, year) IN (('dotNET', 2012), ('Java', 2013)) ) -- !query analysis -Project [s#x, {dotNET, 2012}#xL, {Java, 2013}#xL] -+- Project [s#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, 2012}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, 2013}#xL] - +- Aggregate [s#x], [s#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,2012], [Java,2013], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [s#x, named_struct(course, course#x, year, year#x)], [s#x, named_struct(course, course#x, year, year#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [s#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, 2012}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, 2013}#xL] ++- Aggregate [s#x], [s#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,2012], [Java,2013], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [s#x, named_struct(course, course#x, year, year#x)], [s#x, named_struct(course, course#x, year, year#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -453,25 +441,24 @@ PIVOT ( FOR (course, s) IN (('dotNET', 2) as c1, ('Java', 1) as c2) ) -- !query analysis -Project [year#x, c1#xL, c2#xL] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS c1#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS c2#xL] - +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,2], [Java,1], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS c1#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS c2#xL] ++- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,2], [Java,1], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -610,25 +597,24 @@ PIVOT ( FOR a IN (array(1, 1), array(2, 2)) ) -- !query analysis -Project [year#x, [1, 1]#xL, [2, 2]#xL] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS [1, 1]#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS [2, 2]#xL] - +- Aggregate [year#x], [year#x, pivotfirst(a#x, sum(__auto_generated_subquery_name.earnings)#xL, [1,1], [2,2], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, a#x], [year#x, a#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [earnings#x, year#x, a#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS [1, 1]#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS [2, 2]#xL] ++- Aggregate [year#x], [year#x, pivotfirst(a#x, sum(__auto_generated_subquery_name.earnings)#xL, [1,1], [2,2], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, a#x], [year#x, a#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [earnings#x, year#x, a#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -642,25 +628,24 @@ PIVOT ( FOR (course, a) IN (('dotNET', array(1, 1)), ('Java', array(2, 2))) ) -- !query analysis -Project [year#x, {dotNET, [1, 1]}#xL, {Java, [2, 2]}#xL] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, [1, 1]}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, [2, 2]}#xL] - +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,[1,1]], [Java,[2,2]], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, named_struct(course, course#x, a, a#x)], [year#x, named_struct(course, course#x, a, a#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, year#x, a#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, [1, 1]}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, [2, 2]}#xL] ++- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,[1,1]], [Java,[2,2]], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, named_struct(course, course#x, a, a#x)], [year#x, named_struct(course, course#x, a, a#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, year#x, a#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -674,25 +659,24 @@ PIVOT ( FOR s IN ((1, 'a'), (2, 'b')) ) -- !query analysis -Project [year#x, {1, a}#xL, {2, b}#xL] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {1, a}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {2, b}#xL] - +- Aggregate [year#x], [year#x, pivotfirst(s#x, sum(__auto_generated_subquery_name.earnings)#xL, [1,a], [2,b], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, s#x], [year#x, s#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [earnings#x, year#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {1, a}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {2, b}#xL] ++- Aggregate [year#x], [year#x, pivotfirst(s#x, sum(__auto_generated_subquery_name.earnings)#xL, [1,a], [2,b], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, s#x], [year#x, s#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [earnings#x, year#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -706,25 +690,24 @@ PIVOT ( FOR (course, s) IN (('dotNET', (1, 'a')), ('Java', (2, 'b'))) ) -- !query analysis -Project [year#x, {dotNET, {1, a}}#xL, {Java, {2, b}}#xL] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, {1, a}}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, {2, b}}#xL] - +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,[1,a]], [Java,[2,b]], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, year#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, {1, a}}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, {2, b}}#xL] ++- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,[1,a]], [Java,[2,b]], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, year#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -779,15 +762,14 @@ PIVOT ( FOR Course IN ('dotNET', 'Java') ) -- !query analysis -Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, dotNET#xL, Java#xL] -+- Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x[1] AS Java#xL] - +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, pivotfirst(Course#x, sum(__auto_generated_subquery_name.Earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x] - +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x, sum(Earnings#x) AS sum(__auto_generated_subquery_name.Earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, a AS a#x, z AS z#x, b AS b#x, y AS y#x, c AS c#x, x AS x#x, d AS d#x, w AS w#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x[1] AS Java#xL] ++- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, pivotfirst(Course#x, sum(__auto_generated_subquery_name.Earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x] + +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x, sum(Earnings#x) AS sum(__auto_generated_subquery_name.Earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, a AS a#x, z AS z#x, b AS b#x, y AS y#x, c AS c#x, x AS x#x, d AS d#x, w AS w#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out index c66326e020eb..497f36ff5644 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out @@ -415,12 +415,11 @@ SELECT '' AS `xxx`, * FROM J1_TBL INNER JOIN J2_TBL USING (i) -- !query analysis Project [ AS xxx#x, i#x, j#x, t#x, k#x] -+- Project [i#x, j#x, t#x, k#x] - +- Join Inner, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join Inner, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -428,12 +427,11 @@ SELECT '' AS `xxx`, * FROM J1_TBL JOIN J2_TBL USING (i) -- !query analysis Project [ AS xxx#x, i#x, j#x, t#x, k#x] -+- Project [i#x, j#x, t#x, k#x] - +- Join Inner, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join Inner, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -460,12 +458,11 @@ SELECT '' AS `xxx`, * FROM J1_TBL NATURAL JOIN J2_TBL -- !query analysis Project [ AS xxx#x, i#x, j#x, t#x, k#x] -+- Project [i#x, j#x, t#x, k#x] - +- Join Inner, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join Inner, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -473,16 +470,15 @@ SELECT '' AS `xxx`, * FROM J1_TBL t1 (a, b, c) NATURAL JOIN J2_TBL t2 (a, d) -- !query analysis Project [ AS xxx#x, a#x, b#x, c#x, d#x] -+- Project [a#x, b#x, c#x, d#x] - +- Join Inner, (a#x = a#x) - :- SubqueryAlias t1 - : +- Project [i#x AS a#x, j#x AS b#x, t#x AS c#x] - : +- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias t2 - +- Project [i#x AS a#x, k#x AS d#x] - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join Inner, (a#x = a#x) + :- SubqueryAlias t1 + : +- Project [i#x AS a#x, j#x AS b#x, t#x AS c#x] + : +- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias t2 + +- Project [i#x AS a#x, k#x AS d#x] + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -490,16 +486,15 @@ SELECT '' AS `xxx`, * FROM J1_TBL t1 (a, b, c) NATURAL JOIN J2_TBL t2 (d, a) -- !query analysis Project [ AS xxx#x, a#x, b#x, c#x, d#x] -+- Project [a#x, b#x, c#x, d#x] - +- Join Inner, (a#x = a#x) - :- SubqueryAlias t1 - : +- Project [i#x AS a#x, j#x AS b#x, t#x AS c#x] - : +- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias t2 - +- Project [i#x AS d#x, k#x AS a#x] - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join Inner, (a#x = a#x) + :- SubqueryAlias t1 + : +- Project [i#x AS a#x, j#x AS b#x, t#x AS c#x] + : +- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias t2 + +- Project [i#x AS d#x, k#x AS a#x] + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -573,12 +568,11 @@ SELECT '' AS `xxx`, * FROM J1_TBL RIGHT OUTER JOIN J2_TBL USING (i) -- !query analysis Project [ AS xxx#x, i#x, j#x, t#x, k#x] -+- Project [i#x, j#x, t#x, k#x] - +- Join RightOuter, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join RightOuter, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -586,12 +580,11 @@ SELECT '' AS `xxx`, * FROM J1_TBL RIGHT JOIN J2_TBL USING (i) -- !query analysis Project [ AS xxx#x, i#x, j#x, t#x, k#x] -+- Project [i#x, j#x, t#x, k#x] - +- Join RightOuter, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join RightOuter, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -729,17 +722,16 @@ InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_d -- !query SELECT * FROM t1 FULL JOIN t2 USING (name) FULL JOIN t3 USING (name) -- !query analysis -Project [name#x, n#x, n#x, n#x] -+- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x, n#x] - +- Join FullOuter, (name#x = name#x) - :- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x] - : +- Join FullOuter, (name#x = name#x) - : :- SubqueryAlias spark_catalog.default.t1 - : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, n#x, n#x, n#x] ++- Join FullOuter, (name#x = name#x) + :- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x] + : +- Join FullOuter, (name#x = name#x) + : :- SubqueryAlias spark_catalog.default.t1 + : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -750,16 +742,15 @@ INNER JOIN USING (name) -- !query analysis Project [name#x, n#x, n#x] -+- Project [name#x, n#x, n#x] - +- Join Inner, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join Inner, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -770,16 +761,15 @@ LEFT JOIN USING (name) -- !query analysis Project [name#x, n#x, n#x] -+- Project [name#x, n#x, n#x] - +- Join LeftOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join LeftOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -789,17 +779,16 @@ FULL JOIN (SELECT * FROM t3) s3 USING (name) -- !query analysis -Project [name#x, n#x, n#x] -+- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, n#x, n#x] ++- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -809,16 +798,15 @@ NATURAL INNER JOIN (SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join Inner, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join Inner, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -828,16 +816,15 @@ NATURAL LEFT JOIN (SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join LeftOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join LeftOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -846,17 +833,16 @@ SELECT * FROM NATURAL FULL JOIN (SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis -Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] ++- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -868,22 +854,21 @@ NATURAL INNER JOIN (SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join Inner, (name#x = name#x) - :- Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] - : +- Join Inner, (name#x = name#x) - : :- SubqueryAlias s1 - : : +- Project [name#x, n#x AS s1_n#x, 1 AS s1_1#x] - : : +- SubqueryAlias spark_catalog.default.t1 - : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - : +- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join Inner, (name#x = name#x) + :- Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] + : +- Join Inner, (name#x = name#x) + : :- SubqueryAlias s1 + : : +- Project [name#x, n#x AS s1_n#x, 1 AS s1_1#x] + : : +- SubqueryAlias spark_catalog.default.t1 + : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + : +- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -894,23 +879,22 @@ NATURAL FULL JOIN NATURAL FULL JOIN (SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis -Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join FullOuter, (name#x = name#x) - :- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] - : +- Join FullOuter, (name#x = name#x) - : :- SubqueryAlias s1 - : : +- Project [name#x, n#x AS s1_n#x, 1 AS s1_1#x] - : : +- SubqueryAlias spark_catalog.default.t1 - : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - : +- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] ++- Join FullOuter, (name#x = name#x) + :- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] + : +- Join FullOuter, (name#x = name#x) + : :- SubqueryAlias s1 + : : +- Project [name#x, n#x AS s1_n#x, 1 AS s1_1#x] + : : +- SubqueryAlias spark_catalog.default.t1 + : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + : +- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -923,25 +907,24 @@ NATURAL FULL JOIN (SELECT name, n as s3_n FROM t3) as s3 ) ss2 -- !query analysis -Project [name#x, s1_n#x, s2_n#x, s3_n#x] -+- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s2_n#x, s3_n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s1 - : +- Project [name#x, n#x AS s1_n#x] - : +- SubqueryAlias spark_catalog.default.t1 - : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - +- SubqueryAlias ss2 - +- Project [name#x, s2_n#x, s3_n#x] - +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s3_n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s2_n#x, s3_n#x] ++- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s1 + : +- Project [name#x, n#x AS s1_n#x] + : +- SubqueryAlias spark_catalog.default.t1 + : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + +- SubqueryAlias ss2 + +- Project [name#x, s2_n#x, s3_n#x] + +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s3_n#x] + +- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -954,25 +937,24 @@ NATURAL FULL JOIN (SELECT name, n as s3_n FROM t3) as s3 ) ss2 -- !query analysis -Project [name#x, s1_n#x, s2_n#x, s2_2#x, s3_n#x] -+- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s2_n#x, s2_2#x, s3_n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s1 - : +- Project [name#x, n#x AS s1_n#x] - : +- SubqueryAlias spark_catalog.default.t1 - : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - +- SubqueryAlias ss2 - +- Project [name#x, s2_n#x, s2_2#x, s3_n#x] - +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s2_n#x, s2_2#x, s3_n#x] ++- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s1 + : +- Project [name#x, n#x AS s1_n#x] + : +- SubqueryAlias spark_catalog.default.t1 + : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + +- SubqueryAlias ss2 + +- Project [name#x, s2_n#x, s2_2#x, s3_n#x] + +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x] + +- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out index e5f51cd80bf0..b0809ab8a9fc 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out @@ -733,17 +733,16 @@ InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_d -- !query SELECT * FROM t1 FULL JOIN t2 USING (name) FULL JOIN t3 USING (name) -- !query analysis -Project [name#x, n#x, n#x, n#x] -+- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x, n#x] - +- Join FullOuter, (name#x = name#x) - :- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x] - : +- Join FullOuter, (name#x = name#x) - : :- SubqueryAlias spark_catalog.default.t1 - : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, n#x, n#x, n#x] ++- Join FullOuter, (name#x = name#x) + :- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x] + : +- Join FullOuter, (name#x = name#x) + : :- SubqueryAlias spark_catalog.default.t1 + : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -754,16 +753,15 @@ INNER JOIN USING (name) -- !query analysis Project [name#x, n#x, n#x] -+- Project [name#x, n#x, n#x] - +- Join Inner, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join Inner, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -774,16 +772,15 @@ LEFT JOIN USING (name) -- !query analysis Project [name#x, n#x, n#x] -+- Project [name#x, n#x, n#x] - +- Join LeftOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join LeftOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -813,16 +810,15 @@ NATURAL INNER JOIN (SELECT udf(name) as name, udf(udf(n)) as s3_n, udf(3) as s3_2 FROM t3) s3 -- !query analysis Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join Inner, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x, cast(udf(cast(2 as string)) as int) AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s3_n#x, cast(udf(cast(3 as string)) as int) AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join Inner, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x, cast(udf(cast(2 as string)) as int) AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s3_n#x, cast(udf(cast(3 as string)) as int) AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -832,16 +828,15 @@ NATURAL LEFT JOIN (SELECT udf(udf(name)) as name, udf(n) as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join LeftOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join LeftOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -850,17 +845,16 @@ SELECT * FROM NATURAL FULL JOIN (SELECT udf(udf(name)) as name, udf(udf(n)) as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis -Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] ++- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -872,22 +866,21 @@ NATURAL INNER JOIN (SELECT udf(udf(udf(name))) as name, udf(n) as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join Inner, (name#x = name#x) - :- Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] - : +- Join Inner, (name#x = name#x) - : :- SubqueryAlias s1 - : : +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s1_n#x, 1 AS s1_1#x] - : : +- SubqueryAlias spark_catalog.default.t1 - : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - : +- SubqueryAlias s2 - : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join Inner, (name#x = name#x) + :- Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] + : +- Join Inner, (name#x = name#x) + : :- SubqueryAlias s1 + : : +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s1_n#x, 1 AS s1_1#x] + : : +- SubqueryAlias spark_catalog.default.t1 + : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + : +- SubqueryAlias s2 + : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -898,23 +891,22 @@ NATURAL FULL JOIN NATURAL FULL JOIN (SELECT udf(udf(name)) as name, udf(n) as s3_n, udf(3) as s3_2 FROM t3) s3 -- !query analysis -Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join FullOuter, (name#x = name#x) - :- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] - : +- Join FullOuter, (name#x = name#x) - : :- SubqueryAlias s1 - : : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s1_n#x, cast(udf(cast(cast(udf(cast(1 as string)) as int) as string)) as int) AS s1_1#x] - : : +- SubqueryAlias spark_catalog.default.t1 - : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - : +- SubqueryAlias s2 - : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s2_n#x, cast(udf(cast(2 as string)) as int) AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x, cast(udf(cast(3 as string)) as int) AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] ++- Join FullOuter, (name#x = name#x) + :- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] + : +- Join FullOuter, (name#x = name#x) + : :- SubqueryAlias s1 + : : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s1_n#x, cast(udf(cast(cast(udf(cast(1 as string)) as int) as string)) as int) AS s1_1#x] + : : +- SubqueryAlias spark_catalog.default.t1 + : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + : +- SubqueryAlias s2 + : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s2_n#x, cast(udf(cast(2 as string)) as int) AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x, cast(udf(cast(3 as string)) as int) AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -958,25 +950,24 @@ NATURAL FULL JOIN (SELECT name, udf(n) as s3_n FROM t3) as s3 ) ss2 -- !query analysis -Project [name#x, s1_n#x, s2_n#x, s2_2#x, s3_n#x] -+- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s2_n#x, s2_2#x, s3_n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s1 - : +- Project [name#x, n#x AS s1_n#x] - : +- SubqueryAlias spark_catalog.default.t1 - : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - +- SubqueryAlias ss2 - +- Project [name#x, s2_n#x, s2_2#x, s3_n#x] - +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s2_n#x, s2_2#x, s3_n#x] ++- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s1 + : +- Project [name#x, n#x AS s1_n#x] + : +- SubqueryAlias spark_catalog.default.t1 + : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + +- SubqueryAlias ss2 + +- Project [name#x, s2_n#x, s2_2#x, s3_n#x] + +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x] + +- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out index 08d11e0ac448..e248692cf7ed 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out @@ -59,18 +59,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [udf(year)#x, dotNET#xL, Java#xL] -+- Project [udf(year)#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] - +- Aggregate [udf(year)#x], [udf(year)#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [udf(year)#x, course#x], [udf(year)#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [cast(udf(cast(year#x as string)) as int) AS udf(year)#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [udf(year)#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] ++- Aggregate [udf(year)#x], [udf(year)#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [udf(year)#x, course#x], [udf(year)#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [cast(udf(cast(year#x as string)) as int) AS udf(year)#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -80,16 +79,15 @@ PIVOT ( FOR year IN (2012, 2013) ) -- !query analysis -Project [course#x, 2012#xL, 2013#xL] -+- Project [course#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS 2012#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS 2013#xL] - +- Aggregate [course#x], [course#x, pivotfirst(year#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [course#x, year#x], [course#x, year#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [course#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS 2012#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS 2013#xL] ++- Aggregate [course#x], [course#x, pivotfirst(year#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [course#x, year#x], [course#x, year#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -101,18 +99,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET_udf(sum(earnings))#xL, dotNET_udf(avg(earnings))#x, Java_udf(sum(earnings))#xL, Java_udf(avg(earnings))#x] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x[0] AS dotNET_udf(avg(earnings))#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x[1] AS Java_udf(avg(earnings))#x] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, cast(udf(cast(avg(earnings#x) as string)) as double) AS CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x[0] AS dotNET_udf(avg(earnings))#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x[1] AS Java_udf(avg(earnings))#x] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, cast(udf(cast(avg(earnings#x) as string)) as double) AS CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -124,18 +121,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [dotNET#xL, Java#xL] -+- Project [__pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] - +- Aggregate [pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [course#x], [course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [cast(udf(cast(course#x as string)) as string) AS course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] ++- Aggregate [pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [course#x], [course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [cast(udf(cast(course#x as string)) as string) AS course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -147,18 +143,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [dotNET_udf(sum(udf(earnings)))#xL, dotNET_udf(min(year))#x, Java_udf(sum(udf(earnings)))#xL, Java_udf(min(year))#x] -+- Project [__pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(udf(earnings)))#xL, __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x[0] AS dotNET_udf(min(year))#x, __pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(udf(earnings)))#xL, __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x[1] AS Java_udf(min(year))#x] - +- Aggregate [pivotfirst(course#x, CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(min(year) as string)) AS INT)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x] - +- Aggregate [course#x], [course#x, cast(udf(cast(sum(cast(udf(cast(earnings#x as string)) as int)) as string)) as bigint) AS CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)#xL, cast(udf(cast(min(year#x) as string)) as int) AS CAST(udf(cast(min(year) as string)) AS INT)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(udf(earnings)))#xL, __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x[0] AS dotNET_udf(min(year))#x, __pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(udf(earnings)))#xL, __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x[1] AS Java_udf(min(year))#x] ++- Aggregate [pivotfirst(course#x, CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(min(year) as string)) AS INT)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x] + +- Aggregate [course#x], [course#x, cast(udf(cast(sum(cast(udf(cast(earnings#x as string)) as int)) as string)) as bigint) AS CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)#xL, cast(udf(cast(min(year#x) as string)) as int) AS CAST(udf(cast(min(year) as string)) AS INT)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -172,25 +167,24 @@ PIVOT ( FOR s IN (1, 2) ) -- !query analysis -Project [course#x, year#x, 1#xL, 2#xL] -+- Project [course#x, year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS 1#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS 2#xL] - +- Aggregate [course#x, year#x], [course#x, year#x, pivotfirst(s#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, 1, 2, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [course#x, year#x, s#x], [course#x, year#x, s#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, cast(udf(cast(s#x as string)) as int) AS s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [course#x, year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS 1#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS 2#xL] ++- Aggregate [course#x, year#x], [course#x, year#x, pivotfirst(s#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, 1, 2, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [course#x, year#x, s#x], [course#x, year#x, s#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, cast(udf(cast(s#x as string)) as int) AS s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -204,25 +198,24 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET_udf(sum(earnings))#xL, dotNET_udf(min(s))#x, Java_udf(sum(earnings))#xL, Java_udf(min(s))#x] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x[0] AS dotNET_udf(min(s))#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x[1] AS Java_udf(min(s))#x] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(min(s) as string)) AS INT)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, cast(udf(cast(min(s#x) as string)) as int) AS CAST(udf(cast(min(s) as string)) AS INT)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x[0] AS dotNET_udf(min(s))#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x[1] AS Java_udf(min(s))#x] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(min(s) as string)) AS INT)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, cast(udf(cast(min(s#x) as string)) as int) AS CAST(udf(cast(min(s) as string)) AS INT)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -236,25 +229,24 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET#xL, Java#xL] -+- Project [year#x, __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x[1] AS Java#xL] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum((earnings#x * s#x)) as string)) as bigint) AS CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x[1] AS Java#xL] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum((earnings#x * s#x)) as string)) as bigint) AS CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -266,18 +258,17 @@ PIVOT ( FOR y IN (2012, 2013) ) -- !query analysis -Project [2012_s#xL, 2013_s#xL, 2012_a#x, 2013_a#x, c#x] -+- Project [c#x, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[0] AS 2012_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[0] AS 2012_a#x, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[1] AS 2013_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[1] AS 2013_a#x] - +- Aggregate [c#x], [c#x, pivotfirst(y#x, CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x, pivotfirst(y#x, CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x] - +- Aggregate [c#x, y#x], [c#x, y#x, cast(udf(cast(sum(e#x) as string)) as bigint) AS CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, cast(udf(cast(avg(e#x) as string)) as double) AS CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[0] AS 2012_s#xL, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[1] AS 2013_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[0] AS 2012_a#x, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[1] AS 2013_a#x, c#x] ++- Aggregate [c#x], [c#x, pivotfirst(y#x, CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x, pivotfirst(y#x, CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x] + +- Aggregate [c#x, y#x], [c#x, y#x, cast(udf(cast(sum(e#x) as string)) as bigint) AS CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, cast(udf(cast(avg(e#x) as string)) as double) AS CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -289,18 +280,17 @@ PIVOT ( FOR y IN (2012 as firstYear, 2013 secondYear) ) -- !query analysis -Project [firstYear_s#xL, secondYear_s#xL, firstYear_a#x, secondYear_a#x, c#x] -+- Project [c#x, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[0] AS firstYear_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[0] AS firstYear_a#x, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[1] AS secondYear_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[1] AS secondYear_a#x] - +- Aggregate [c#x], [c#x, pivotfirst(y#x, CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x, pivotfirst(y#x, CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x] - +- Aggregate [c#x, y#x], [c#x, y#x, cast(udf(cast(sum(e#x) as string)) as bigint) AS CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, cast(udf(cast(avg(e#x) as string)) as double) AS CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[0] AS firstYear_s#xL, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[1] AS secondYear_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[0] AS firstYear_a#x, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[1] AS secondYear_a#x, c#x] ++- Aggregate [c#x], [c#x, pivotfirst(y#x, CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x, pivotfirst(y#x, CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x] + +- Aggregate [c#x, y#x], [c#x, y#x, cast(udf(cast(sum(e#x) as string)) as bigint) AS CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, cast(udf(cast(avg(e#x) as string)) as double) AS CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -373,18 +363,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET_udf(CEIL(udf(sum(earnings))))#xL, dotNET_a1#x, Java_udf(CEIL(udf(sum(earnings))))#xL, Java_a1#x] -+- Project [year#x, __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x[0] AS dotNET_udf(CEIL(udf(sum(earnings))))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[0] AS dotNET_a1#x, __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x[1] AS Java_udf(CEIL(udf(sum(earnings))))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[1] AS Java_a1#x] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x, pivotfirst(course#x, (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x, dotNET, Java, 0, 0) AS __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(CEIL(cast(udf(cast(sum(earnings#x) as string)) as bigint)) as string)) as bigint) AS CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)#xL, (avg(earnings#x) + cast(1 as double)) AS (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [year#x, __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x[0] AS dotNET_udf(CEIL(udf(sum(earnings))))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[0] AS dotNET_a1#x, __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x[1] AS Java_udf(CEIL(udf(sum(earnings))))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[1] AS Java_a1#x] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x, pivotfirst(course#x, (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x, dotNET, Java, 0, 0) AS __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(CEIL(cast(udf(cast(sum(earnings#x) as string)) as bigint)) as string)) as bigint) AS CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)#xL, (avg(earnings#x) + cast(1 as double)) AS (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -421,25 +410,24 @@ PIVOT ( FOR (course, year) IN (('dotNET', 2012), ('Java', 2013)) ) -- !query analysis -Project [s#x, {dotNET, 2012}#xL, {Java, 2013}#xL] -+- Project [s#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, 2012}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, 2013}#xL] - +- Aggregate [s#x], [s#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,2012], [Java,2013], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [s#x, named_struct(course, course#x, year, year#x)], [s#x, named_struct(course, course#x, year, year#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [s#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, 2012}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, 2013}#xL] ++- Aggregate [s#x], [s#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,2012], [Java,2013], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [s#x, named_struct(course, course#x, year, year#x)], [s#x, named_struct(course, course#x, year, year#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -453,25 +441,24 @@ PIVOT ( FOR (course, s) IN (('dotNET', 2) as c1, ('Java', 1) as c2) ) -- !query analysis -Project [year#x, c1#xL, c2#xL] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS c1#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS c2#xL] - +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,2], [Java,1], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS c1#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS c2#xL] ++- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,2], [Java,1], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -550,25 +537,24 @@ PIVOT ( FOR a IN (array(1, 1), array(2, 2)) ) -- !query analysis -Project [year#x, [1, 1]#xL, [2, 2]#xL] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS [1, 1]#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS [2, 2]#xL] - +- Aggregate [year#x], [year#x, pivotfirst(a#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [1,1], [2,2], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, a#x], [year#x, a#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [earnings#x, year#x, a#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS [1, 1]#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS [2, 2]#xL] ++- Aggregate [year#x], [year#x, pivotfirst(a#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [1,1], [2,2], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, a#x], [year#x, a#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [earnings#x, year#x, a#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -582,25 +568,24 @@ PIVOT ( FOR (course, a) IN (('dotNET', array(1, 1)), ('Java', array(2, 2))) ) -- !query analysis -Project [year#x, {dotNET, [1, 1]}#xL, {Java, [2, 2]}#xL] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, [1, 1]}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, [2, 2]}#xL] - +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,[1,1]], [Java,[2,2]], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, named_struct(course, course#x, a, a#x)], [year#x, named_struct(course, course#x, a, a#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, cast(udf(cast(year#x as string)) as int) AS year#x, a#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, [1, 1]}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, [2, 2]}#xL] ++- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,[1,1]], [Java,[2,2]], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, named_struct(course, course#x, a, a#x)], [year#x, named_struct(course, course#x, a, a#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, cast(udf(cast(year#x as string)) as int) AS year#x, a#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -614,25 +599,24 @@ PIVOT ( FOR s IN ((1, 'a'), (2, 'b')) ) -- !query analysis -Project [year#x, {1, a}#xL, {2, b}#xL] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {1, a}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {2, b}#xL] - +- Aggregate [year#x], [year#x, pivotfirst(s#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [1,a], [2,b], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, s#x], [year#x, s#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [earnings#x, year#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {1, a}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {2, b}#xL] ++- Aggregate [year#x], [year#x, pivotfirst(s#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [1,a], [2,b], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, s#x], [year#x, s#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [earnings#x, year#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -646,25 +630,24 @@ PIVOT ( FOR (course, s) IN (('dotNET', (1, 'a')), ('Java', (2, 'b'))) ) -- !query analysis -Project [year#x, {dotNET, {1, a}}#xL, {Java, {2, b}}#xL] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, {1, a}}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, {2, b}}#xL] - +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,[1,a]], [Java,[2,b]], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, year#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, {1, a}}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, {2, b}}#xL] ++- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,[1,a]], [Java,[2,b]], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, year#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -720,15 +703,14 @@ PIVOT ( FOR Course IN ('dotNET', 'Java') ) -- !query analysis -Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, dotNET#xL, Java#xL] -+- Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] - +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, pivotfirst(Course#x, CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x] - +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x, cast(udf(cast(sum(Earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, cast(udf(cast(a as string)) as string) AS a#x, cast(udf(cast(z as string)) as string) AS z#x, cast(udf(cast(b as string)) as string) AS b#x, cast(udf(cast(y as string)) as string) AS y#x, cast(udf(cast(c as string)) as string) AS c#x, cast(udf(cast(x as string)) as string) AS x#x, cast(udf(cast(d as string)) as string) AS d#x, cast(udf(cast(w as string)) as string) AS w#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] ++- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, pivotfirst(Course#x, CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x] + +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x, cast(udf(cast(sum(Earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, cast(udf(cast(a as string)) as string) AS a#x, cast(udf(cast(z as string)) as string) AS z#x, cast(udf(cast(b as string)) as string) AS b#x, cast(udf(cast(y as string)) as string) AS y#x, cast(udf(cast(c as string)) as string) AS c#x, cast(udf(cast(x as string)) as string) AS x#x, cast(udf(cast(d as string)) as string) AS d#x, cast(udf(cast(w as string)) as string) AS w#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out index 97410d3cdd36..5fa3d69d1f19 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out @@ -39,20 +39,19 @@ CreateViewCommand `nt2`, select * from values SELECT * FROM nt1 left outer join nt2 using (k) -- !query analysis Project [k#x, v1#x, v2#x] -+- Project [k#x, v1#x, v2#x] - +- Join LeftOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -79,20 +78,19 @@ Project [k#x] SELECT nt1.*, nt2.* FROM nt1 left outer join nt2 using (k) -- !query analysis Project [k#x, v1#x, k#x, v2#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join LeftOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -223,20 +221,19 @@ Project [k#x, k#x] SELECT * FROM nt1 left semi join nt2 using (k) -- !query analysis Project [k#x, v1#x] -+- Project [k#x, v1#x] - +- Join LeftSemi, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftSemi, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -263,20 +260,19 @@ Project [k#x] SELECT nt1.* FROM nt1 left semi join nt2 using (k) -- !query analysis Project [k#x, v1#x] -+- Project [k#x, v1#x] - +- Join LeftSemi, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftSemi, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -303,40 +299,38 @@ Project [k#x] SELECT k, nt1.k FROM nt1 left semi join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x] - +- Join LeftSemi, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftSemi, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT * FROM nt1 right outer join nt2 using (k) -- !query analysis Project [k#x, v1#x, v2#x] -+- Project [k#x, v1#x, v2#x] - +- Join RightOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join RightOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -363,20 +357,19 @@ Project [k#x] SELECT nt1.*, nt2.* FROM nt1 right outer join nt2 using (k) -- !query analysis Project [k#x, v1#x, k#x, v2#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join RightOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join RightOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -485,21 +478,20 @@ Project [k#x, k#x] -- !query SELECT * FROM nt1 full outer join nt2 using (k) -- !query analysis -Project [k#x, v1#x, v2#x] -+- Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x] - +- Join FullOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] +Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x] ++- Join FullOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -648,21 +640,20 @@ Project [k#x, k#x] -- !query SELECT * FROM nt1 full outer join nt2 using (k) -- !query analysis -Project [k#x, v1#x, v2#x] -+- Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x] - +- Join FullOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] +Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x] ++- Join FullOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -689,20 +680,19 @@ Project [k#x] SELECT nt1.*, nt2.* FROM nt1 inner join nt2 using (k) -- !query analysis Project [k#x, v1#x, k#x, v2#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query From 92144cd80955e02e44c630fda338b9f0e268b724 Mon Sep 17 00:00:00 2001 From: ashahid Date: Fri, 15 Dec 2023 22:13:47 -0800 Subject: [PATCH 088/129] SPARK-45959. reworked and simplified the code. Instead of Collapsing before analyse, now collapsing after analyze --- .../sql/internal/EarlyCollapseProject.scala | 68 ++++++++++++++----- 1 file changed, 50 insertions(+), 18 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala index e12e99f7c089..99c535a93411 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala @@ -19,9 +19,10 @@ package org.apache.spark.sql.internal import scala.util.{Failure, Success, Try} -import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, Expression, NamedExpression, UserDefinedExpression, WindowExpression} +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, AttributeReference, AttributeSet, Expression, NamedExpression, UserDefinedExpression, WindowExpression} import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} +import org.apache.spark.sql.types.{Metadata, MetadataBuilder} private[sql] object EarlyCollapseProject { object OpType extends Enumeration { @@ -55,18 +56,20 @@ private[sql] object EarlyCollapseProject { opType match { case OpType.AddNewColumnsOnly => // case of new columns being added only - val childOutput = child.output.map(_.name).toSet - val attribsRemappedInProj = projList.flatMap(ne => ne match { - case _: AttributeReference => Seq.empty[(String, Expression)] - - case Alias(expr, name) => if (childOutput.contains(name)) { - Seq(name -> expr) - } else { - Seq.empty[(String, Expression)] - } + val childOutput = child.outputSet + val attribsRemappedInProj = AttributeMap( + projList.flatMap(ne => ne match { + case _: AttributeReference => Seq.empty[(Attribute, Expression)] + + case al @ Alias(expr, _) => + if (childOutput.contains(al.toAttribute)) { + Seq(al.toAttribute -> expr) + } else { + Seq.empty[(Attribute, Expression)] + } - case _ => Seq.empty[(String, Expression)] - }).toMap + case _ => Seq.empty[(Attribute, Expression)] + })) if (tinkeredOrNewNamedExprs.exists(_.collectFirst { // we will not flatten if expressions contain windows or aggregate as if they @@ -82,12 +85,13 @@ private[sql] object EarlyCollapseProject { val remappedNewProjListResult = Try { newProjList.map { case attr: AttributeReference => projList.find( - _.toAttribute.canonicalized == attr.canonicalized).getOrElse(attr) + _.toAttribute.canonicalized == attr.canonicalized).map( + transferMetadata(attr, _)).getOrElse(attr) case anyOtherExpr => (anyOtherExpr transformUp { case attr: AttributeReference => - attribsRemappedInProj.get(attr.name).orElse(projList.find( + attribsRemappedInProj.get(attr).orElse(projList.find( _.toAttribute.canonicalized == attr.canonicalized).map { case al: Alias => al.child case x => x @@ -107,14 +111,24 @@ private[sql] object EarlyCollapseProject { val remappedNewProjListResult = Try { newProjList.map { case attr: AttributeReference => projList.find( - _.toAttribute.canonicalized == attr.canonicalized).get - + _.toAttribute.canonicalized == attr.canonicalized).map { + // To Handle the case of change of case via toSchema + case al: Alias => if (attr.name == al.name) { + transferMetadata(attr, al) + } else { + transferMetadata(attr, al.copy(name = attr.name)( + exprId = al.exprId, qualifier = al.qualifier, + explicitMetadata = al.explicitMetadata, + nonInheritableMetadataKeys = al.nonInheritableMetadataKeys)) + } + }.get case al@Alias(ar: AttributeReference, name) => projList.find(_.toAttribute.canonicalized == ar.canonicalized).map { - case alx: Alias => Alias(alx.child, name)(al.exprId, al.qualifier, - al.explicitMetadata, al.nonInheritableMetadataKeys) + case alx: Alias => transferMetadata(al.toAttribute, + Alias(alx.child, name)(al.exprId, al.qualifier, + al.explicitMetadata, al.nonInheritableMetadataKeys)) case _: AttributeReference => al }.get @@ -133,6 +147,24 @@ private[sql] object EarlyCollapseProject { } } + private def transferMetadata(from: Attribute, to: NamedExpression): NamedExpression = + if (from.metadata == Metadata.empty) { + to + } else { + to match { + case al: Alias => + val newMdBuilder = new MetadataBuilder().withMetadata(from.metadata) + val newMd = newMdBuilder.build() + + al.copy()(exprId = al.exprId, qualifier = al.qualifier, + nonInheritableMetadataKeys = al.nonInheritableMetadataKeys, + explicitMetadata = Option(newMd)) + + case attr: AttributeReference => attr.copy(metadata = from.metadata)( + exprId = attr.exprId, qualifier = from.qualifier) + } + } + private def identifyOp( passThruAttribs: Seq[NamedExpression], currentOutputAttribs: AttributeSet, From 2176a6ec4db53d62ef8b324ec8d6f13023500d92 Mon Sep 17 00:00:00 2001 From: ashahid Date: Fri, 15 Dec 2023 22:17:11 -0800 Subject: [PATCH 089/129] SPARK-45959. reworked and simplified the code. Instead of Collapsing before analyse, now collapsing after analyze --- .../analyzer-results/natural-join.sql.out | 269 ++++----- .../sql-tests/analyzer-results/pivot.sql.out | 536 +++++++++--------- .../analyzer-results/postgreSQL/join.sql.out | 370 ++++++------ .../udf/postgreSQL/udf-join.sql.out | 219 +++---- .../analyzer-results/udf/udf-pivot.sql.out | 536 +++++++++--------- .../analyzer-results/using-join.sql.out | 274 ++++----- 6 files changed, 1142 insertions(+), 1062 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out index 53c418a3f85a..41c8876a7d25 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out @@ -71,19 +71,20 @@ CreateViewCommand `nt4`, select * from values SELECT * FROM nt1 natural join nt2 -- !query analysis Project [k#x, v1#x, v2#x] -+- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Project [k#x, v1#x, v2#x] + +- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -298,38 +299,40 @@ Project [k#x] SELECT nt1.*, nt2.* FROM nt1 natural join nt2 -- !query analysis Project [k#x, v1#x, k#x, v2#x] -+- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Project [k#x, v1#x, v2#x, k#x] + +- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT *, nt2.k FROM nt1 natural join nt2 -- !query analysis Project [k#x, v1#x, v2#x, k#x] -+- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Project [k#x, v1#x, v2#x, k#x] + +- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -507,54 +510,56 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException SELECT * FROM nt1 natural join nt2 natural join nt3 -- !query analysis Project [k#x, v1#x, v2#x, v3#x] -+- Join Inner, (k#x = k#x) - :- Project [k#x, v1#x, v2#x] - : +- Join Inner, (k#x = k#x) - : :- SubqueryAlias nt1 - : : +- View (`nt1`, [k#x,v1#x]) - : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : : +- Project [k#x, v1#x] - : : +- SubqueryAlias nt1 - : : +- LocalRelation [k#x, v1#x] - : +- SubqueryAlias nt2 - : +- View (`nt2`, [k#x,v2#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - : +- Project [k#x, v2#x] - : +- SubqueryAlias nt2 - : +- LocalRelation [k#x, v2#x] - +- SubqueryAlias nt3 - +- View (`nt3`, [k#x,v3#x]) - +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] - +- Project [k#x, v3#x] - +- SubqueryAlias nt3 - +- LocalRelation [k#x, v3#x] ++- Project [k#x, v1#x, v2#x, v3#x] + +- Join Inner, (k#x = k#x) + :- Project [k#x, v1#x, v2#x] + : +- Join Inner, (k#x = k#x) + : :- SubqueryAlias nt1 + : : +- View (`nt1`, [k#x,v1#x]) + : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : : +- Project [k#x, v1#x] + : : +- SubqueryAlias nt1 + : : +- LocalRelation [k#x, v1#x] + : +- SubqueryAlias nt2 + : +- View (`nt2`, [k#x,v2#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + : +- Project [k#x, v2#x] + : +- SubqueryAlias nt2 + : +- LocalRelation [k#x, v2#x] + +- SubqueryAlias nt3 + +- View (`nt3`, [k#x,v3#x]) + +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] + +- Project [k#x, v3#x] + +- SubqueryAlias nt3 + +- LocalRelation [k#x, v3#x] -- !query SELECT nt1.*, nt2.*, nt3.* FROM nt1 natural join nt2 natural join nt3 -- !query analysis Project [k#x, v1#x, k#x, v2#x, k#x, v3#x] -+- Join Inner, (k#x = k#x) - :- Project [k#x, v1#x, v2#x, k#x] - : +- Join Inner, (k#x = k#x) - : :- SubqueryAlias nt1 - : : +- View (`nt1`, [k#x,v1#x]) - : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : : +- Project [k#x, v1#x] - : : +- SubqueryAlias nt1 - : : +- LocalRelation [k#x, v1#x] - : +- SubqueryAlias nt2 - : +- View (`nt2`, [k#x,v2#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - : +- Project [k#x, v2#x] - : +- SubqueryAlias nt2 - : +- LocalRelation [k#x, v2#x] - +- SubqueryAlias nt3 - +- View (`nt3`, [k#x,v3#x]) - +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] - +- Project [k#x, v3#x] - +- SubqueryAlias nt3 - +- LocalRelation [k#x, v3#x] ++- Project [k#x, v1#x, v2#x, v3#x, k#x, k#x] + +- Join Inner, (k#x = k#x) + :- Project [k#x, v1#x, v2#x, k#x] + : +- Join Inner, (k#x = k#x) + : :- SubqueryAlias nt1 + : : +- View (`nt1`, [k#x,v1#x]) + : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : : +- Project [k#x, v1#x] + : : +- SubqueryAlias nt1 + : : +- LocalRelation [k#x, v1#x] + : +- SubqueryAlias nt2 + : +- View (`nt2`, [k#x,v2#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + : +- Project [k#x, v2#x] + : +- SubqueryAlias nt2 + : +- LocalRelation [k#x, v2#x] + +- SubqueryAlias nt3 + +- View (`nt3`, [k#x,v3#x]) + +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] + +- Project [k#x, v3#x] + +- SubqueryAlias nt3 + +- LocalRelation [k#x, v3#x] -- !query @@ -615,59 +620,61 @@ Project [k#x, v1#x, v2#x, k#x, v3#x] SELECT * FROM nt1 natural join nt2 join nt3 on nt2.k = nt3.k -- !query analysis Project [k#x, v1#x, v2#x, k#x, v3#x] -+- Join Inner, (k#x = k#x) - :- Project [k#x, v1#x, v2#x, k#x] - : +- Join Inner, (k#x = k#x) - : :- SubqueryAlias nt1 - : : +- View (`nt1`, [k#x,v1#x]) - : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : : +- Project [k#x, v1#x] - : : +- SubqueryAlias nt1 - : : +- LocalRelation [k#x, v1#x] - : +- SubqueryAlias nt2 - : +- View (`nt2`, [k#x,v2#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - : +- Project [k#x, v2#x] - : +- SubqueryAlias nt2 - : +- LocalRelation [k#x, v2#x] - +- SubqueryAlias nt3 - +- View (`nt3`, [k#x,v3#x]) - +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] - +- Project [k#x, v3#x] - +- SubqueryAlias nt3 - +- LocalRelation [k#x, v3#x] ++- Project [k#x, v1#x, v2#x, k#x, v3#x] + +- Join Inner, (k#x = k#x) + :- Project [k#x, v1#x, v2#x, k#x] + : +- Join Inner, (k#x = k#x) + : :- SubqueryAlias nt1 + : : +- View (`nt1`, [k#x,v1#x]) + : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : : +- Project [k#x, v1#x] + : : +- SubqueryAlias nt1 + : : +- LocalRelation [k#x, v1#x] + : +- SubqueryAlias nt2 + : +- View (`nt2`, [k#x,v2#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + : +- Project [k#x, v2#x] + : +- SubqueryAlias nt2 + : +- LocalRelation [k#x, v2#x] + +- SubqueryAlias nt3 + +- View (`nt3`, [k#x,v3#x]) + +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] + +- Project [k#x, v3#x] + +- SubqueryAlias nt3 + +- LocalRelation [k#x, v3#x] -- !query SELECT nt1.*, nt2.*, nt3.*, nt4.* FROM nt1 natural join nt2 natural join nt3 natural join nt4 -- !query analysis Project [k#x, v1#x, k#x, v2#x, k#x, v3#x, k#x, v4#x] -+- Join Inner, (k#x = k#x) - :- Project [k#x, v1#x, v2#x, v3#x, k#x, k#x] - : +- Join Inner, (k#x = k#x) - : :- Project [k#x, v1#x, v2#x, k#x] - : : +- Join Inner, (k#x = k#x) - : : :- SubqueryAlias nt1 - : : : +- View (`nt1`, [k#x,v1#x]) - : : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : : : +- Project [k#x, v1#x] - : : : +- SubqueryAlias nt1 - : : : +- LocalRelation [k#x, v1#x] - : : +- SubqueryAlias nt2 - : : +- View (`nt2`, [k#x,v2#x]) - : : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - : : +- Project [k#x, v2#x] - : : +- SubqueryAlias nt2 - : : +- LocalRelation [k#x, v2#x] - : +- SubqueryAlias nt3 - : +- View (`nt3`, [k#x,v3#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] - : +- Project [k#x, v3#x] - : +- SubqueryAlias nt3 - : +- LocalRelation [k#x, v3#x] - +- SubqueryAlias nt4 - +- View (`nt4`, [k#x,v4#x]) - +- Project [cast(k#x as string) AS k#x, cast(v4#x as int) AS v4#x] - +- Project [k#x, v4#x] - +- SubqueryAlias nt4 - +- LocalRelation [k#x, v4#x] ++- Project [k#x, v1#x, v2#x, v3#x, v4#x, k#x, k#x, k#x] + +- Join Inner, (k#x = k#x) + :- Project [k#x, v1#x, v2#x, v3#x, k#x, k#x] + : +- Join Inner, (k#x = k#x) + : :- Project [k#x, v1#x, v2#x, k#x] + : : +- Join Inner, (k#x = k#x) + : : :- SubqueryAlias nt1 + : : : +- View (`nt1`, [k#x,v1#x]) + : : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : : : +- Project [k#x, v1#x] + : : : +- SubqueryAlias nt1 + : : : +- LocalRelation [k#x, v1#x] + : : +- SubqueryAlias nt2 + : : +- View (`nt2`, [k#x,v2#x]) + : : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + : : +- Project [k#x, v2#x] + : : +- SubqueryAlias nt2 + : : +- LocalRelation [k#x, v2#x] + : +- SubqueryAlias nt3 + : +- View (`nt3`, [k#x,v3#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] + : +- Project [k#x, v3#x] + : +- SubqueryAlias nt3 + : +- LocalRelation [k#x, v3#x] + +- SubqueryAlias nt4 + +- View (`nt4`, [k#x,v4#x]) + +- Project [cast(k#x as string) AS k#x, cast(v4#x as int) AS v4#x] + +- Project [k#x, v4#x] + +- SubqueryAlias nt4 + +- LocalRelation [k#x, v4#x] diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out index 23df9320925d..ead14bdd882d 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out @@ -59,17 +59,18 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java#xL] -+- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [year#x, dotNET#xL, Java#xL] ++- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java#xL] + +- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -79,15 +80,16 @@ PIVOT ( FOR year IN (2012, 2013) ) -- !query analysis -Project [course#x, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[0] AS 2012#xL, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[1] AS 2013#xL] -+- Aggregate [course#x], [course#x, pivotfirst(year#x, sum(coursesales.earnings)#xL, 2012, 2013, 0, 0) AS __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x] - +- Aggregate [course#x, year#x], [course#x, year#x, sum(earnings#x) AS sum(coursesales.earnings)#xL] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [course#x, 2012#xL, 2013#xL] ++- Project [course#x, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[0] AS 2012#xL, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[1] AS 2013#xL] + +- Aggregate [course#x], [course#x, pivotfirst(year#x, sum(coursesales.earnings)#xL, 2012, 2013, 0, 0) AS __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x] + +- Aggregate [course#x, year#x], [course#x, year#x, sum(earnings#x) AS sum(coursesales.earnings)#xL] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -99,17 +101,18 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_avg(earnings)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x[1] AS Java_avg(earnings)#x] -+- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, avg(__auto_generated_subquery_name.earnings)#x, dotNET, Java, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, avg(earnings#x) AS avg(__auto_generated_subquery_name.earnings)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [year#x, dotNET_sum(earnings)#xL, dotNET_avg(earnings)#x, Java_sum(earnings)#xL, Java_avg(earnings)#x] ++- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_avg(earnings)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x[1] AS Java_avg(earnings)#x] + +- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, avg(__auto_generated_subquery_name.earnings)#x, dotNET, Java, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, avg(earnings#x) AS avg(__auto_generated_subquery_name.earnings)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -121,17 +124,18 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [__pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java#xL] -+- Aggregate [pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [course#x], [course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [dotNET#xL, Java#xL] ++- Project [__pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java#xL] + +- Aggregate [pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [course#x], [course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -143,17 +147,18 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [__pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x[0] AS dotNET_min(year)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x[1] AS Java_min(year)#x] -+- Aggregate [pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, min(__auto_generated_subquery_name.year)#x, dotNET, Java, 0, 0) AS __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x] - +- Aggregate [course#x], [course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, min(year#x) AS min(__auto_generated_subquery_name.year)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [dotNET_sum(earnings)#xL, dotNET_min(year)#x, Java_sum(earnings)#xL, Java_min(year)#x] ++- Project [__pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x[0] AS dotNET_min(year)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x[1] AS Java_min(year)#x] + +- Aggregate [pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, min(__auto_generated_subquery_name.year)#x, dotNET, Java, 0, 0) AS __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x] + +- Aggregate [course#x], [course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, min(year#x) AS min(__auto_generated_subquery_name.year)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -167,24 +172,25 @@ PIVOT ( FOR s IN (1, 2) ) -- !query analysis -Project [course#x, year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS 1#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS 2#xL] -+- Aggregate [course#x, year#x], [course#x, year#x, pivotfirst(s#x, sum(__auto_generated_subquery_name.earnings)#xL, 1, 2, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [course#x, year#x, s#x], [course#x, year#x, s#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [course#x, year#x, 1#xL, 2#xL] ++- Project [course#x, year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS 1#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS 2#xL] + +- Aggregate [course#x, year#x], [course#x, year#x, pivotfirst(s#x, sum(__auto_generated_subquery_name.earnings)#xL, 1, 2, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [course#x, year#x, s#x], [course#x, year#x, s#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -198,24 +204,25 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x[0] AS dotNET_min(s)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x[1] AS Java_min(s)#x] -+- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, min(__auto_generated_subquery_name.s)#x, dotNET, Java, 0, 0) AS __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, min(s#x) AS min(__auto_generated_subquery_name.s)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, dotNET_sum(earnings)#xL, dotNET_min(s)#x, Java_sum(earnings)#xL, Java_min(s)#x] ++- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x[0] AS dotNET_min(s)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x[1] AS Java_min(s)#x] + +- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, min(__auto_generated_subquery_name.s)#x, dotNET, Java, 0, 0) AS __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, min(s#x) AS min(__auto_generated_subquery_name.s)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -229,24 +236,25 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x[0] AS dotNET#xL, __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x[1] AS Java#xL] -+- Aggregate [year#x], [year#x, pivotfirst(course#x, sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))#xL, dotNET, Java, 0, 0) AS __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, sum((earnings#x * s#x)) AS sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, dotNET#xL, Java#xL] ++- Project [year#x, __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x[0] AS dotNET#xL, __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x[1] AS Java#xL] + +- Aggregate [year#x], [year#x, pivotfirst(course#x, sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))#xL, dotNET, Java, 0, 0) AS __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, sum((earnings#x * s#x)) AS sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -258,17 +266,18 @@ PIVOT ( FOR y IN (2012, 2013) ) -- !query analysis -Project [__pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[0] AS 2012_s#xL, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[1] AS 2013_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[0] AS 2012_a#x, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[1] AS 2013_a#x, c#x] -+- Aggregate [c#x], [c#x, pivotfirst(y#x, sum(__auto_generated_subquery_name.e) AS s#xL, 2012, 2013, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x, pivotfirst(y#x, avg(__auto_generated_subquery_name.e) AS a#x, 2012, 2013, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x] - +- Aggregate [c#x, y#x], [c#x, y#x, sum(e#x) AS sum(__auto_generated_subquery_name.e) AS s#xL, avg(e#x) AS avg(__auto_generated_subquery_name.e) AS a#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [2012_s#xL, 2013_s#xL, 2012_a#x, 2013_a#x, c#x] ++- Project [c#x, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[0] AS 2012_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[0] AS 2012_a#x, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[1] AS 2013_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[1] AS 2013_a#x] + +- Aggregate [c#x], [c#x, pivotfirst(y#x, sum(__auto_generated_subquery_name.e) AS s#xL, 2012, 2013, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x, pivotfirst(y#x, avg(__auto_generated_subquery_name.e) AS a#x, 2012, 2013, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x] + +- Aggregate [c#x, y#x], [c#x, y#x, sum(e#x) AS sum(__auto_generated_subquery_name.e) AS s#xL, avg(e#x) AS avg(__auto_generated_subquery_name.e) AS a#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -280,17 +289,18 @@ PIVOT ( FOR y IN (2012 as firstYear, 2013 secondYear) ) -- !query analysis -Project [__pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[0] AS firstYear_s#xL, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[1] AS secondYear_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[0] AS firstYear_a#x, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[1] AS secondYear_a#x, c#x] -+- Aggregate [c#x], [c#x, pivotfirst(y#x, sum(__auto_generated_subquery_name.e) AS s#xL, 2012, 2013, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x, pivotfirst(y#x, avg(__auto_generated_subquery_name.e) AS a#x, 2012, 2013, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x] - +- Aggregate [c#x, y#x], [c#x, y#x, sum(e#x) AS sum(__auto_generated_subquery_name.e) AS s#xL, avg(e#x) AS avg(__auto_generated_subquery_name.e) AS a#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [firstYear_s#xL, secondYear_s#xL, firstYear_a#x, secondYear_a#x, c#x] ++- Project [c#x, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[0] AS firstYear_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[0] AS firstYear_a#x, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[1] AS secondYear_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[1] AS secondYear_a#x] + +- Aggregate [c#x], [c#x, pivotfirst(y#x, sum(__auto_generated_subquery_name.e) AS s#xL, 2012, 2013, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x, pivotfirst(y#x, avg(__auto_generated_subquery_name.e) AS a#x, 2012, 2013, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x] + +- Aggregate [c#x, y#x], [c#x, y#x, sum(e#x) AS sum(__auto_generated_subquery_name.e) AS s#xL, avg(e#x) AS avg(__auto_generated_subquery_name.e) AS a#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -363,17 +373,18 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x[0] AS dotNET_CEIL(sum(earnings))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[0] AS dotNET_a1#x, __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x[1] AS Java_CEIL(sum(earnings))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[1] AS Java_a1#x] -+- Aggregate [year#x], [year#x, pivotfirst(course#x, CEIL(sum(__auto_generated_subquery_name.earnings))#xL, dotNET, Java, 0, 0) AS __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x, pivotfirst(course#x, (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x, dotNET, Java, 0, 0) AS __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, CEIL(sum(earnings#x)) AS CEIL(sum(__auto_generated_subquery_name.earnings))#xL, (avg(earnings#x) + cast(1 as double)) AS (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [year#x, dotNET_CEIL(sum(earnings))#xL, dotNET_a1#x, Java_CEIL(sum(earnings))#xL, Java_a1#x] ++- Project [year#x, __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x[0] AS dotNET_CEIL(sum(earnings))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[0] AS dotNET_a1#x, __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x[1] AS Java_CEIL(sum(earnings))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[1] AS Java_a1#x] + +- Aggregate [year#x], [year#x, pivotfirst(course#x, CEIL(sum(__auto_generated_subquery_name.earnings))#xL, dotNET, Java, 0, 0) AS __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x, pivotfirst(course#x, (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x, dotNET, Java, 0, 0) AS __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, CEIL(sum(earnings#x)) AS CEIL(sum(__auto_generated_subquery_name.earnings))#xL, (avg(earnings#x) + cast(1 as double)) AS (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -410,24 +421,25 @@ PIVOT ( FOR (course, year) IN (('dotNET', 2012), ('Java', 2013)) ) -- !query analysis -Project [s#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, 2012}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, 2013}#xL] -+- Aggregate [s#x], [s#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,2012], [Java,2013], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [s#x, named_struct(course, course#x, year, year#x)], [s#x, named_struct(course, course#x, year, year#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [s#x, {dotNET, 2012}#xL, {Java, 2013}#xL] ++- Project [s#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, 2012}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, 2013}#xL] + +- Aggregate [s#x], [s#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,2012], [Java,2013], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [s#x, named_struct(course, course#x, year, year#x)], [s#x, named_struct(course, course#x, year, year#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -441,24 +453,25 @@ PIVOT ( FOR (course, s) IN (('dotNET', 2) as c1, ('Java', 1) as c2) ) -- !query analysis -Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS c1#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS c2#xL] -+- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,2], [Java,1], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, c1#xL, c2#xL] ++- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS c1#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS c2#xL] + +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,2], [Java,1], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -597,24 +610,25 @@ PIVOT ( FOR a IN (array(1, 1), array(2, 2)) ) -- !query analysis -Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS [1, 1]#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS [2, 2]#xL] -+- Aggregate [year#x], [year#x, pivotfirst(a#x, sum(__auto_generated_subquery_name.earnings)#xL, [1,1], [2,2], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, a#x], [year#x, a#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [earnings#x, year#x, a#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, [1, 1]#xL, [2, 2]#xL] ++- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS [1, 1]#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS [2, 2]#xL] + +- Aggregate [year#x], [year#x, pivotfirst(a#x, sum(__auto_generated_subquery_name.earnings)#xL, [1,1], [2,2], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, a#x], [year#x, a#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [earnings#x, year#x, a#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -628,24 +642,25 @@ PIVOT ( FOR (course, a) IN (('dotNET', array(1, 1)), ('Java', array(2, 2))) ) -- !query analysis -Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, [1, 1]}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, [2, 2]}#xL] -+- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,[1,1]], [Java,[2,2]], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, named_struct(course, course#x, a, a#x)], [year#x, named_struct(course, course#x, a, a#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, year#x, a#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, {dotNET, [1, 1]}#xL, {Java, [2, 2]}#xL] ++- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, [1, 1]}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, [2, 2]}#xL] + +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,[1,1]], [Java,[2,2]], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, named_struct(course, course#x, a, a#x)], [year#x, named_struct(course, course#x, a, a#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, year#x, a#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -659,24 +674,25 @@ PIVOT ( FOR s IN ((1, 'a'), (2, 'b')) ) -- !query analysis -Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {1, a}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {2, b}#xL] -+- Aggregate [year#x], [year#x, pivotfirst(s#x, sum(__auto_generated_subquery_name.earnings)#xL, [1,a], [2,b], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, s#x], [year#x, s#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [earnings#x, year#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, {1, a}#xL, {2, b}#xL] ++- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {1, a}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {2, b}#xL] + +- Aggregate [year#x], [year#x, pivotfirst(s#x, sum(__auto_generated_subquery_name.earnings)#xL, [1,a], [2,b], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, s#x], [year#x, s#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [earnings#x, year#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -690,24 +706,25 @@ PIVOT ( FOR (course, s) IN (('dotNET', (1, 'a')), ('Java', (2, 'b'))) ) -- !query analysis -Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, {1, a}}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, {2, b}}#xL] -+- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,[1,a]], [Java,[2,b]], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, year#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, {dotNET, {1, a}}#xL, {Java, {2, b}}#xL] ++- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, {1, a}}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, {2, b}}#xL] + +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,[1,a]], [Java,[2,b]], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, year#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -762,14 +779,15 @@ PIVOT ( FOR Course IN ('dotNET', 'Java') ) -- !query analysis -Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x[1] AS Java#xL] -+- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, pivotfirst(Course#x, sum(__auto_generated_subquery_name.Earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x] - +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x, sum(Earnings#x) AS sum(__auto_generated_subquery_name.Earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, a AS a#x, z AS z#x, b AS b#x, y AS y#x, c AS c#x, x AS x#x, d AS d#x, w AS w#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, dotNET#xL, Java#xL] ++- Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x[1] AS Java#xL] + +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, pivotfirst(Course#x, sum(__auto_generated_subquery_name.Earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x] + +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x, sum(Earnings#x) AS sum(__auto_generated_subquery_name.Earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, a AS a#x, z AS z#x, b AS b#x, y AS y#x, c AS c#x, x AS x#x, d AS d#x, w AS w#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out index 497f36ff5644..c66326e020eb 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out @@ -415,11 +415,12 @@ SELECT '' AS `xxx`, * FROM J1_TBL INNER JOIN J2_TBL USING (i) -- !query analysis Project [ AS xxx#x, i#x, j#x, t#x, k#x] -+- Join Inner, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Project [i#x, j#x, t#x, k#x] + +- Join Inner, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -427,11 +428,12 @@ SELECT '' AS `xxx`, * FROM J1_TBL JOIN J2_TBL USING (i) -- !query analysis Project [ AS xxx#x, i#x, j#x, t#x, k#x] -+- Join Inner, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Project [i#x, j#x, t#x, k#x] + +- Join Inner, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -458,11 +460,12 @@ SELECT '' AS `xxx`, * FROM J1_TBL NATURAL JOIN J2_TBL -- !query analysis Project [ AS xxx#x, i#x, j#x, t#x, k#x] -+- Join Inner, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Project [i#x, j#x, t#x, k#x] + +- Join Inner, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -470,15 +473,16 @@ SELECT '' AS `xxx`, * FROM J1_TBL t1 (a, b, c) NATURAL JOIN J2_TBL t2 (a, d) -- !query analysis Project [ AS xxx#x, a#x, b#x, c#x, d#x] -+- Join Inner, (a#x = a#x) - :- SubqueryAlias t1 - : +- Project [i#x AS a#x, j#x AS b#x, t#x AS c#x] - : +- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias t2 - +- Project [i#x AS a#x, k#x AS d#x] - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Project [a#x, b#x, c#x, d#x] + +- Join Inner, (a#x = a#x) + :- SubqueryAlias t1 + : +- Project [i#x AS a#x, j#x AS b#x, t#x AS c#x] + : +- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias t2 + +- Project [i#x AS a#x, k#x AS d#x] + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -486,15 +490,16 @@ SELECT '' AS `xxx`, * FROM J1_TBL t1 (a, b, c) NATURAL JOIN J2_TBL t2 (d, a) -- !query analysis Project [ AS xxx#x, a#x, b#x, c#x, d#x] -+- Join Inner, (a#x = a#x) - :- SubqueryAlias t1 - : +- Project [i#x AS a#x, j#x AS b#x, t#x AS c#x] - : +- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias t2 - +- Project [i#x AS d#x, k#x AS a#x] - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Project [a#x, b#x, c#x, d#x] + +- Join Inner, (a#x = a#x) + :- SubqueryAlias t1 + : +- Project [i#x AS a#x, j#x AS b#x, t#x AS c#x] + : +- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias t2 + +- Project [i#x AS d#x, k#x AS a#x] + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -568,11 +573,12 @@ SELECT '' AS `xxx`, * FROM J1_TBL RIGHT OUTER JOIN J2_TBL USING (i) -- !query analysis Project [ AS xxx#x, i#x, j#x, t#x, k#x] -+- Join RightOuter, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Project [i#x, j#x, t#x, k#x] + +- Join RightOuter, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -580,11 +586,12 @@ SELECT '' AS `xxx`, * FROM J1_TBL RIGHT JOIN J2_TBL USING (i) -- !query analysis Project [ AS xxx#x, i#x, j#x, t#x, k#x] -+- Join RightOuter, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Project [i#x, j#x, t#x, k#x] + +- Join RightOuter, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -722,16 +729,17 @@ InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_d -- !query SELECT * FROM t1 FULL JOIN t2 USING (name) FULL JOIN t3 USING (name) -- !query analysis -Project [coalesce(name#x, name#x) AS name#x, n#x, n#x, n#x] -+- Join FullOuter, (name#x = name#x) - :- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x] - : +- Join FullOuter, (name#x = name#x) - : :- SubqueryAlias spark_catalog.default.t1 - : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [name#x, n#x, n#x, n#x] ++- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x, n#x] + +- Join FullOuter, (name#x = name#x) + :- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x] + : +- Join FullOuter, (name#x = name#x) + : :- SubqueryAlias spark_catalog.default.t1 + : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -742,15 +750,16 @@ INNER JOIN USING (name) -- !query analysis Project [name#x, n#x, n#x] -+- Join Inner, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Project [name#x, n#x, n#x] + +- Join Inner, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -761,15 +770,16 @@ LEFT JOIN USING (name) -- !query analysis Project [name#x, n#x, n#x] -+- Join LeftOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Project [name#x, n#x, n#x] + +- Join LeftOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -779,16 +789,17 @@ FULL JOIN (SELECT * FROM t3) s3 USING (name) -- !query analysis -Project [coalesce(name#x, name#x) AS name#x, n#x, n#x] -+- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [name#x, n#x, n#x] ++- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x] + +- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -798,15 +809,16 @@ NATURAL INNER JOIN (SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Join Inner, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] + +- Join Inner, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -816,15 +828,16 @@ NATURAL LEFT JOIN (SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Join LeftOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] + +- Join LeftOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -833,16 +846,17 @@ SELECT * FROM NATURAL FULL JOIN (SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis -Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] ++- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] + +- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -854,21 +868,22 @@ NATURAL INNER JOIN (SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Join Inner, (name#x = name#x) - :- Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] - : +- Join Inner, (name#x = name#x) - : :- SubqueryAlias s1 - : : +- Project [name#x, n#x AS s1_n#x, 1 AS s1_1#x] - : : +- SubqueryAlias spark_catalog.default.t1 - : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - : +- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] + +- Join Inner, (name#x = name#x) + :- Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] + : +- Join Inner, (name#x = name#x) + : :- SubqueryAlias s1 + : : +- Project [name#x, n#x AS s1_n#x, 1 AS s1_1#x] + : : +- SubqueryAlias spark_catalog.default.t1 + : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + : +- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -879,22 +894,23 @@ NATURAL FULL JOIN NATURAL FULL JOIN (SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis -Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Join FullOuter, (name#x = name#x) - :- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] - : +- Join FullOuter, (name#x = name#x) - : :- SubqueryAlias s1 - : : +- Project [name#x, n#x AS s1_n#x, 1 AS s1_1#x] - : : +- SubqueryAlias spark_catalog.default.t1 - : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - : +- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] ++- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] + +- Join FullOuter, (name#x = name#x) + :- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] + : +- Join FullOuter, (name#x = name#x) + : :- SubqueryAlias s1 + : : +- Project [name#x, n#x AS s1_n#x, 1 AS s1_1#x] + : : +- SubqueryAlias spark_catalog.default.t1 + : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + : +- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -907,24 +923,25 @@ NATURAL FULL JOIN (SELECT name, n as s3_n FROM t3) as s3 ) ss2 -- !query analysis -Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s2_n#x, s3_n#x] -+- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s1 - : +- Project [name#x, n#x AS s1_n#x] - : +- SubqueryAlias spark_catalog.default.t1 - : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - +- SubqueryAlias ss2 - +- Project [name#x, s2_n#x, s3_n#x] - +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s3_n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [name#x, s1_n#x, s2_n#x, s3_n#x] ++- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s2_n#x, s3_n#x] + +- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s1 + : +- Project [name#x, n#x AS s1_n#x] + : +- SubqueryAlias spark_catalog.default.t1 + : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + +- SubqueryAlias ss2 + +- Project [name#x, s2_n#x, s3_n#x] + +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s3_n#x] + +- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -937,24 +954,25 @@ NATURAL FULL JOIN (SELECT name, n as s3_n FROM t3) as s3 ) ss2 -- !query analysis -Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s2_n#x, s2_2#x, s3_n#x] -+- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s1 - : +- Project [name#x, n#x AS s1_n#x] - : +- SubqueryAlias spark_catalog.default.t1 - : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - +- SubqueryAlias ss2 - +- Project [name#x, s2_n#x, s2_2#x, s3_n#x] - +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [name#x, s1_n#x, s2_n#x, s2_2#x, s3_n#x] ++- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s2_n#x, s2_2#x, s3_n#x] + +- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s1 + : +- Project [name#x, n#x AS s1_n#x] + : +- SubqueryAlias spark_catalog.default.t1 + : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + +- SubqueryAlias ss2 + +- Project [name#x, s2_n#x, s2_2#x, s3_n#x] + +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x] + +- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out index b0809ab8a9fc..e5f51cd80bf0 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out @@ -733,16 +733,17 @@ InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_d -- !query SELECT * FROM t1 FULL JOIN t2 USING (name) FULL JOIN t3 USING (name) -- !query analysis -Project [coalesce(name#x, name#x) AS name#x, n#x, n#x, n#x] -+- Join FullOuter, (name#x = name#x) - :- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x] - : +- Join FullOuter, (name#x = name#x) - : :- SubqueryAlias spark_catalog.default.t1 - : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [name#x, n#x, n#x, n#x] ++- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x, n#x] + +- Join FullOuter, (name#x = name#x) + :- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x] + : +- Join FullOuter, (name#x = name#x) + : :- SubqueryAlias spark_catalog.default.t1 + : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -753,15 +754,16 @@ INNER JOIN USING (name) -- !query analysis Project [name#x, n#x, n#x] -+- Join Inner, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Project [name#x, n#x, n#x] + +- Join Inner, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -772,15 +774,16 @@ LEFT JOIN USING (name) -- !query analysis Project [name#x, n#x, n#x] -+- Join LeftOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Project [name#x, n#x, n#x] + +- Join LeftOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -810,15 +813,16 @@ NATURAL INNER JOIN (SELECT udf(name) as name, udf(udf(n)) as s3_n, udf(3) as s3_2 FROM t3) s3 -- !query analysis Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Join Inner, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x, cast(udf(cast(2 as string)) as int) AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s3_n#x, cast(udf(cast(3 as string)) as int) AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] + +- Join Inner, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x, cast(udf(cast(2 as string)) as int) AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s3_n#x, cast(udf(cast(3 as string)) as int) AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -828,15 +832,16 @@ NATURAL LEFT JOIN (SELECT udf(udf(name)) as name, udf(n) as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Join LeftOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] + +- Join LeftOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -845,16 +850,17 @@ SELECT * FROM NATURAL FULL JOIN (SELECT udf(udf(name)) as name, udf(udf(n)) as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis -Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] ++- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] + +- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -866,21 +872,22 @@ NATURAL INNER JOIN (SELECT udf(udf(udf(name))) as name, udf(n) as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Join Inner, (name#x = name#x) - :- Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] - : +- Join Inner, (name#x = name#x) - : :- SubqueryAlias s1 - : : +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s1_n#x, 1 AS s1_1#x] - : : +- SubqueryAlias spark_catalog.default.t1 - : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - : +- SubqueryAlias s2 - : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] + +- Join Inner, (name#x = name#x) + :- Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] + : +- Join Inner, (name#x = name#x) + : :- SubqueryAlias s1 + : : +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s1_n#x, 1 AS s1_1#x] + : : +- SubqueryAlias spark_catalog.default.t1 + : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + : +- SubqueryAlias s2 + : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -891,22 +898,23 @@ NATURAL FULL JOIN NATURAL FULL JOIN (SELECT udf(udf(name)) as name, udf(n) as s3_n, udf(3) as s3_2 FROM t3) s3 -- !query analysis -Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Join FullOuter, (name#x = name#x) - :- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] - : +- Join FullOuter, (name#x = name#x) - : :- SubqueryAlias s1 - : : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s1_n#x, cast(udf(cast(cast(udf(cast(1 as string)) as int) as string)) as int) AS s1_1#x] - : : +- SubqueryAlias spark_catalog.default.t1 - : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - : +- SubqueryAlias s2 - : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s2_n#x, cast(udf(cast(2 as string)) as int) AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x, cast(udf(cast(3 as string)) as int) AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] ++- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] + +- Join FullOuter, (name#x = name#x) + :- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] + : +- Join FullOuter, (name#x = name#x) + : :- SubqueryAlias s1 + : : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s1_n#x, cast(udf(cast(cast(udf(cast(1 as string)) as int) as string)) as int) AS s1_1#x] + : : +- SubqueryAlias spark_catalog.default.t1 + : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + : +- SubqueryAlias s2 + : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s2_n#x, cast(udf(cast(2 as string)) as int) AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x, cast(udf(cast(3 as string)) as int) AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -950,24 +958,25 @@ NATURAL FULL JOIN (SELECT name, udf(n) as s3_n FROM t3) as s3 ) ss2 -- !query analysis -Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s2_n#x, s2_2#x, s3_n#x] -+- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s1 - : +- Project [name#x, n#x AS s1_n#x] - : +- SubqueryAlias spark_catalog.default.t1 - : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - +- SubqueryAlias ss2 - +- Project [name#x, s2_n#x, s2_2#x, s3_n#x] - +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [name#x, s1_n#x, s2_n#x, s2_2#x, s3_n#x] ++- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s2_n#x, s2_2#x, s3_n#x] + +- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s1 + : +- Project [name#x, n#x AS s1_n#x] + : +- SubqueryAlias spark_catalog.default.t1 + : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + +- SubqueryAlias ss2 + +- Project [name#x, s2_n#x, s2_2#x, s3_n#x] + +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x] + +- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out index e248692cf7ed..08d11e0ac448 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out @@ -59,17 +59,18 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [udf(year)#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] -+- Aggregate [udf(year)#x], [udf(year)#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [udf(year)#x, course#x], [udf(year)#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [cast(udf(cast(year#x as string)) as int) AS udf(year)#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [udf(year)#x, dotNET#xL, Java#xL] ++- Project [udf(year)#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] + +- Aggregate [udf(year)#x], [udf(year)#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [udf(year)#x, course#x], [udf(year)#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [cast(udf(cast(year#x as string)) as int) AS udf(year)#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -79,15 +80,16 @@ PIVOT ( FOR year IN (2012, 2013) ) -- !query analysis -Project [course#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS 2012#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS 2013#xL] -+- Aggregate [course#x], [course#x, pivotfirst(year#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [course#x, year#x], [course#x, year#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [course#x, 2012#xL, 2013#xL] ++- Project [course#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS 2012#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS 2013#xL] + +- Aggregate [course#x], [course#x, pivotfirst(year#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [course#x, year#x], [course#x, year#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -99,17 +101,18 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x[0] AS dotNET_udf(avg(earnings))#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x[1] AS Java_udf(avg(earnings))#x] -+- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, cast(udf(cast(avg(earnings#x) as string)) as double) AS CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [year#x, dotNET_udf(sum(earnings))#xL, dotNET_udf(avg(earnings))#x, Java_udf(sum(earnings))#xL, Java_udf(avg(earnings))#x] ++- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x[0] AS dotNET_udf(avg(earnings))#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x[1] AS Java_udf(avg(earnings))#x] + +- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, cast(udf(cast(avg(earnings#x) as string)) as double) AS CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -121,17 +124,18 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [__pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] -+- Aggregate [pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [course#x], [course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [cast(udf(cast(course#x as string)) as string) AS course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [dotNET#xL, Java#xL] ++- Project [__pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] + +- Aggregate [pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [course#x], [course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [cast(udf(cast(course#x as string)) as string) AS course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -143,17 +147,18 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [__pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(udf(earnings)))#xL, __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x[0] AS dotNET_udf(min(year))#x, __pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(udf(earnings)))#xL, __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x[1] AS Java_udf(min(year))#x] -+- Aggregate [pivotfirst(course#x, CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(min(year) as string)) AS INT)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x] - +- Aggregate [course#x], [course#x, cast(udf(cast(sum(cast(udf(cast(earnings#x as string)) as int)) as string)) as bigint) AS CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)#xL, cast(udf(cast(min(year#x) as string)) as int) AS CAST(udf(cast(min(year) as string)) AS INT)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [dotNET_udf(sum(udf(earnings)))#xL, dotNET_udf(min(year))#x, Java_udf(sum(udf(earnings)))#xL, Java_udf(min(year))#x] ++- Project [__pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(udf(earnings)))#xL, __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x[0] AS dotNET_udf(min(year))#x, __pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(udf(earnings)))#xL, __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x[1] AS Java_udf(min(year))#x] + +- Aggregate [pivotfirst(course#x, CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(min(year) as string)) AS INT)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x] + +- Aggregate [course#x], [course#x, cast(udf(cast(sum(cast(udf(cast(earnings#x as string)) as int)) as string)) as bigint) AS CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)#xL, cast(udf(cast(min(year#x) as string)) as int) AS CAST(udf(cast(min(year) as string)) AS INT)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -167,24 +172,25 @@ PIVOT ( FOR s IN (1, 2) ) -- !query analysis -Project [course#x, year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS 1#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS 2#xL] -+- Aggregate [course#x, year#x], [course#x, year#x, pivotfirst(s#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, 1, 2, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [course#x, year#x, s#x], [course#x, year#x, s#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, cast(udf(cast(s#x as string)) as int) AS s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [course#x, year#x, 1#xL, 2#xL] ++- Project [course#x, year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS 1#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS 2#xL] + +- Aggregate [course#x, year#x], [course#x, year#x, pivotfirst(s#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, 1, 2, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [course#x, year#x, s#x], [course#x, year#x, s#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, cast(udf(cast(s#x as string)) as int) AS s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -198,24 +204,25 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x[0] AS dotNET_udf(min(s))#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x[1] AS Java_udf(min(s))#x] -+- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(min(s) as string)) AS INT)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, cast(udf(cast(min(s#x) as string)) as int) AS CAST(udf(cast(min(s) as string)) AS INT)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, dotNET_udf(sum(earnings))#xL, dotNET_udf(min(s))#x, Java_udf(sum(earnings))#xL, Java_udf(min(s))#x] ++- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x[0] AS dotNET_udf(min(s))#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x[1] AS Java_udf(min(s))#x] + +- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(min(s) as string)) AS INT)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, cast(udf(cast(min(s#x) as string)) as int) AS CAST(udf(cast(min(s) as string)) AS INT)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -229,24 +236,25 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x[1] AS Java#xL] -+- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum((earnings#x * s#x)) as string)) as bigint) AS CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, dotNET#xL, Java#xL] ++- Project [year#x, __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x[1] AS Java#xL] + +- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum((earnings#x * s#x)) as string)) as bigint) AS CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -258,17 +266,18 @@ PIVOT ( FOR y IN (2012, 2013) ) -- !query analysis -Project [__pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[0] AS 2012_s#xL, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[1] AS 2013_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[0] AS 2012_a#x, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[1] AS 2013_a#x, c#x] -+- Aggregate [c#x], [c#x, pivotfirst(y#x, CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x, pivotfirst(y#x, CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x] - +- Aggregate [c#x, y#x], [c#x, y#x, cast(udf(cast(sum(e#x) as string)) as bigint) AS CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, cast(udf(cast(avg(e#x) as string)) as double) AS CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [2012_s#xL, 2013_s#xL, 2012_a#x, 2013_a#x, c#x] ++- Project [c#x, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[0] AS 2012_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[0] AS 2012_a#x, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[1] AS 2013_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[1] AS 2013_a#x] + +- Aggregate [c#x], [c#x, pivotfirst(y#x, CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x, pivotfirst(y#x, CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x] + +- Aggregate [c#x, y#x], [c#x, y#x, cast(udf(cast(sum(e#x) as string)) as bigint) AS CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, cast(udf(cast(avg(e#x) as string)) as double) AS CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -280,17 +289,18 @@ PIVOT ( FOR y IN (2012 as firstYear, 2013 secondYear) ) -- !query analysis -Project [__pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[0] AS firstYear_s#xL, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[1] AS secondYear_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[0] AS firstYear_a#x, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[1] AS secondYear_a#x, c#x] -+- Aggregate [c#x], [c#x, pivotfirst(y#x, CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x, pivotfirst(y#x, CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x] - +- Aggregate [c#x, y#x], [c#x, y#x, cast(udf(cast(sum(e#x) as string)) as bigint) AS CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, cast(udf(cast(avg(e#x) as string)) as double) AS CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [firstYear_s#xL, secondYear_s#xL, firstYear_a#x, secondYear_a#x, c#x] ++- Project [c#x, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[0] AS firstYear_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[0] AS firstYear_a#x, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[1] AS secondYear_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[1] AS secondYear_a#x] + +- Aggregate [c#x], [c#x, pivotfirst(y#x, CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x, pivotfirst(y#x, CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x] + +- Aggregate [c#x, y#x], [c#x, y#x, cast(udf(cast(sum(e#x) as string)) as bigint) AS CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, cast(udf(cast(avg(e#x) as string)) as double) AS CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -363,17 +373,18 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x[0] AS dotNET_udf(CEIL(udf(sum(earnings))))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[0] AS dotNET_a1#x, __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x[1] AS Java_udf(CEIL(udf(sum(earnings))))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[1] AS Java_a1#x] -+- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x, pivotfirst(course#x, (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x, dotNET, Java, 0, 0) AS __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(CEIL(cast(udf(cast(sum(earnings#x) as string)) as bigint)) as string)) as bigint) AS CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)#xL, (avg(earnings#x) + cast(1 as double)) AS (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [year#x, dotNET_udf(CEIL(udf(sum(earnings))))#xL, dotNET_a1#x, Java_udf(CEIL(udf(sum(earnings))))#xL, Java_a1#x] ++- Project [year#x, __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x[0] AS dotNET_udf(CEIL(udf(sum(earnings))))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[0] AS dotNET_a1#x, __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x[1] AS Java_udf(CEIL(udf(sum(earnings))))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[1] AS Java_a1#x] + +- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x, pivotfirst(course#x, (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x, dotNET, Java, 0, 0) AS __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(CEIL(cast(udf(cast(sum(earnings#x) as string)) as bigint)) as string)) as bigint) AS CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)#xL, (avg(earnings#x) + cast(1 as double)) AS (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -410,24 +421,25 @@ PIVOT ( FOR (course, year) IN (('dotNET', 2012), ('Java', 2013)) ) -- !query analysis -Project [s#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, 2012}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, 2013}#xL] -+- Aggregate [s#x], [s#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,2012], [Java,2013], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [s#x, named_struct(course, course#x, year, year#x)], [s#x, named_struct(course, course#x, year, year#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [s#x, {dotNET, 2012}#xL, {Java, 2013}#xL] ++- Project [s#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, 2012}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, 2013}#xL] + +- Aggregate [s#x], [s#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,2012], [Java,2013], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [s#x, named_struct(course, course#x, year, year#x)], [s#x, named_struct(course, course#x, year, year#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -441,24 +453,25 @@ PIVOT ( FOR (course, s) IN (('dotNET', 2) as c1, ('Java', 1) as c2) ) -- !query analysis -Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS c1#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS c2#xL] -+- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,2], [Java,1], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, c1#xL, c2#xL] ++- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS c1#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS c2#xL] + +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,2], [Java,1], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -537,24 +550,25 @@ PIVOT ( FOR a IN (array(1, 1), array(2, 2)) ) -- !query analysis -Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS [1, 1]#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS [2, 2]#xL] -+- Aggregate [year#x], [year#x, pivotfirst(a#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [1,1], [2,2], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, a#x], [year#x, a#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [earnings#x, year#x, a#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, [1, 1]#xL, [2, 2]#xL] ++- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS [1, 1]#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS [2, 2]#xL] + +- Aggregate [year#x], [year#x, pivotfirst(a#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [1,1], [2,2], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, a#x], [year#x, a#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [earnings#x, year#x, a#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -568,24 +582,25 @@ PIVOT ( FOR (course, a) IN (('dotNET', array(1, 1)), ('Java', array(2, 2))) ) -- !query analysis -Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, [1, 1]}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, [2, 2]}#xL] -+- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,[1,1]], [Java,[2,2]], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, named_struct(course, course#x, a, a#x)], [year#x, named_struct(course, course#x, a, a#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, cast(udf(cast(year#x as string)) as int) AS year#x, a#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, {dotNET, [1, 1]}#xL, {Java, [2, 2]}#xL] ++- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, [1, 1]}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, [2, 2]}#xL] + +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,[1,1]], [Java,[2,2]], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, named_struct(course, course#x, a, a#x)], [year#x, named_struct(course, course#x, a, a#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, cast(udf(cast(year#x as string)) as int) AS year#x, a#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -599,24 +614,25 @@ PIVOT ( FOR s IN ((1, 'a'), (2, 'b')) ) -- !query analysis -Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {1, a}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {2, b}#xL] -+- Aggregate [year#x], [year#x, pivotfirst(s#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [1,a], [2,b], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, s#x], [year#x, s#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [earnings#x, year#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, {1, a}#xL, {2, b}#xL] ++- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {1, a}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {2, b}#xL] + +- Aggregate [year#x], [year#x, pivotfirst(s#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [1,a], [2,b], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, s#x], [year#x, s#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [earnings#x, year#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -630,24 +646,25 @@ PIVOT ( FOR (course, s) IN (('dotNET', (1, 'a')), ('Java', (2, 'b'))) ) -- !query analysis -Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, {1, a}}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, {2, b}}#xL] -+- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,[1,a]], [Java,[2,b]], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, year#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, {dotNET, {1, a}}#xL, {Java, {2, b}}#xL] ++- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, {1, a}}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, {2, b}}#xL] + +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,[1,a]], [Java,[2,b]], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, year#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -703,14 +720,15 @@ PIVOT ( FOR Course IN ('dotNET', 'Java') ) -- !query analysis -Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] -+- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, pivotfirst(Course#x, CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x] - +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x, cast(udf(cast(sum(Earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, cast(udf(cast(a as string)) as string) AS a#x, cast(udf(cast(z as string)) as string) AS z#x, cast(udf(cast(b as string)) as string) AS b#x, cast(udf(cast(y as string)) as string) AS y#x, cast(udf(cast(c as string)) as string) AS c#x, cast(udf(cast(x as string)) as string) AS x#x, cast(udf(cast(d as string)) as string) AS d#x, cast(udf(cast(w as string)) as string) AS w#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, dotNET#xL, Java#xL] ++- Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] + +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, pivotfirst(Course#x, CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x] + +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x, cast(udf(cast(sum(Earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, cast(udf(cast(a as string)) as string) AS a#x, cast(udf(cast(z as string)) as string) AS z#x, cast(udf(cast(b as string)) as string) AS b#x, cast(udf(cast(y as string)) as string) AS y#x, cast(udf(cast(c as string)) as string) AS c#x, cast(udf(cast(x as string)) as string) AS x#x, cast(udf(cast(d as string)) as string) AS d#x, cast(udf(cast(w as string)) as string) AS w#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out index 5fa3d69d1f19..97410d3cdd36 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out @@ -39,19 +39,20 @@ CreateViewCommand `nt2`, select * from values SELECT * FROM nt1 left outer join nt2 using (k) -- !query analysis Project [k#x, v1#x, v2#x] -+- Join LeftOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Project [k#x, v1#x, v2#x] + +- Join LeftOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -78,19 +79,20 @@ Project [k#x] SELECT nt1.*, nt2.* FROM nt1 left outer join nt2 using (k) -- !query analysis Project [k#x, v1#x, k#x, v2#x] -+- Join LeftOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Project [k#x, v1#x, v2#x, k#x] + +- Join LeftOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -221,19 +223,20 @@ Project [k#x, k#x] SELECT * FROM nt1 left semi join nt2 using (k) -- !query analysis Project [k#x, v1#x] -+- Join LeftSemi, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Project [k#x, v1#x] + +- Join LeftSemi, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -260,19 +263,20 @@ Project [k#x] SELECT nt1.* FROM nt1 left semi join nt2 using (k) -- !query analysis Project [k#x, v1#x] -+- Join LeftSemi, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Project [k#x, v1#x] + +- Join LeftSemi, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -299,38 +303,40 @@ Project [k#x] SELECT k, nt1.k FROM nt1 left semi join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Join LeftSemi, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Project [k#x, v1#x] + +- Join LeftSemi, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT * FROM nt1 right outer join nt2 using (k) -- !query analysis Project [k#x, v1#x, v2#x] -+- Join RightOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Project [k#x, v1#x, v2#x] + +- Join RightOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -357,19 +363,20 @@ Project [k#x] SELECT nt1.*, nt2.* FROM nt1 right outer join nt2 using (k) -- !query analysis Project [k#x, v1#x, k#x, v2#x] -+- Join RightOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Project [k#x, v1#x, v2#x, k#x] + +- Join RightOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -478,20 +485,21 @@ Project [k#x, k#x] -- !query SELECT * FROM nt1 full outer join nt2 using (k) -- !query analysis -Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x] -+- Join FullOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] +Project [k#x, v1#x, v2#x] ++- Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x] + +- Join FullOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -640,20 +648,21 @@ Project [k#x, k#x] -- !query SELECT * FROM nt1 full outer join nt2 using (k) -- !query analysis -Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x] -+- Join FullOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] +Project [k#x, v1#x, v2#x] ++- Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x] + +- Join FullOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -680,19 +689,20 @@ Project [k#x] SELECT nt1.*, nt2.* FROM nt1 inner join nt2 using (k) -- !query analysis Project [k#x, v1#x, k#x, v2#x] -+- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Project [k#x, v1#x, v2#x, k#x] + +- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query From f58feca9d0abdfaf0b18488ab5a6b1ce1ea5871b Mon Sep 17 00:00:00 2001 From: ashahid Date: Fri, 15 Dec 2023 23:21:21 -0800 Subject: [PATCH 090/129] SPARK-45959. reworked and simplified the code. Instead of Collapsing before analyse, now collapsing after analyze --- .../sql-tests/analyzer-results/pivot.sql.out | 46 +++++---- .../analyzer-results/postgreSQL/join.sql.out | 93 +++++++++---------- .../analyzer-results/udf/udf-pivot.sql.out | 46 +++++---- 3 files changed, 87 insertions(+), 98 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out index ead14bdd882d..24441bf806a6 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out @@ -124,18 +124,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [dotNET#xL, Java#xL] -+- Project [__pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java#xL] - +- Aggregate [pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [course#x], [course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java#xL] ++- Aggregate [pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [course#x], [course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -147,18 +146,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [dotNET_sum(earnings)#xL, dotNET_min(year)#x, Java_sum(earnings)#xL, Java_min(year)#x] -+- Project [__pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x[0] AS dotNET_min(year)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x[1] AS Java_min(year)#x] - +- Aggregate [pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, min(__auto_generated_subquery_name.year)#x, dotNET, Java, 0, 0) AS __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x] - +- Aggregate [course#x], [course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, min(year#x) AS min(__auto_generated_subquery_name.year)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x[0] AS dotNET_min(year)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x[1] AS Java_min(year)#x] ++- Aggregate [pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, min(__auto_generated_subquery_name.year)#x, dotNET, Java, 0, 0) AS __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x] + +- Aggregate [course#x], [course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, min(year#x) AS min(__auto_generated_subquery_name.year)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out index c66326e020eb..c72ddedce53d 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out @@ -415,12 +415,11 @@ SELECT '' AS `xxx`, * FROM J1_TBL INNER JOIN J2_TBL USING (i) -- !query analysis Project [ AS xxx#x, i#x, j#x, t#x, k#x] -+- Project [i#x, j#x, t#x, k#x] - +- Join Inner, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join Inner, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -428,12 +427,11 @@ SELECT '' AS `xxx`, * FROM J1_TBL JOIN J2_TBL USING (i) -- !query analysis Project [ AS xxx#x, i#x, j#x, t#x, k#x] -+- Project [i#x, j#x, t#x, k#x] - +- Join Inner, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join Inner, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -460,12 +458,11 @@ SELECT '' AS `xxx`, * FROM J1_TBL NATURAL JOIN J2_TBL -- !query analysis Project [ AS xxx#x, i#x, j#x, t#x, k#x] -+- Project [i#x, j#x, t#x, k#x] - +- Join Inner, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join Inner, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -473,16 +470,15 @@ SELECT '' AS `xxx`, * FROM J1_TBL t1 (a, b, c) NATURAL JOIN J2_TBL t2 (a, d) -- !query analysis Project [ AS xxx#x, a#x, b#x, c#x, d#x] -+- Project [a#x, b#x, c#x, d#x] - +- Join Inner, (a#x = a#x) - :- SubqueryAlias t1 - : +- Project [i#x AS a#x, j#x AS b#x, t#x AS c#x] - : +- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias t2 - +- Project [i#x AS a#x, k#x AS d#x] - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join Inner, (a#x = a#x) + :- SubqueryAlias t1 + : +- Project [i#x AS a#x, j#x AS b#x, t#x AS c#x] + : +- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias t2 + +- Project [i#x AS a#x, k#x AS d#x] + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -490,16 +486,15 @@ SELECT '' AS `xxx`, * FROM J1_TBL t1 (a, b, c) NATURAL JOIN J2_TBL t2 (d, a) -- !query analysis Project [ AS xxx#x, a#x, b#x, c#x, d#x] -+- Project [a#x, b#x, c#x, d#x] - +- Join Inner, (a#x = a#x) - :- SubqueryAlias t1 - : +- Project [i#x AS a#x, j#x AS b#x, t#x AS c#x] - : +- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias t2 - +- Project [i#x AS d#x, k#x AS a#x] - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join Inner, (a#x = a#x) + :- SubqueryAlias t1 + : +- Project [i#x AS a#x, j#x AS b#x, t#x AS c#x] + : +- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias t2 + +- Project [i#x AS d#x, k#x AS a#x] + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -573,12 +568,11 @@ SELECT '' AS `xxx`, * FROM J1_TBL RIGHT OUTER JOIN J2_TBL USING (i) -- !query analysis Project [ AS xxx#x, i#x, j#x, t#x, k#x] -+- Project [i#x, j#x, t#x, k#x] - +- Join RightOuter, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join RightOuter, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -586,12 +580,11 @@ SELECT '' AS `xxx`, * FROM J1_TBL RIGHT JOIN J2_TBL USING (i) -- !query analysis Project [ AS xxx#x, i#x, j#x, t#x, k#x] -+- Project [i#x, j#x, t#x, k#x] - +- Join RightOuter, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join RightOuter, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out index 08d11e0ac448..eb8f87b3cf59 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out @@ -124,18 +124,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [dotNET#xL, Java#xL] -+- Project [__pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] - +- Aggregate [pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [course#x], [course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [cast(udf(cast(course#x as string)) as string) AS course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] ++- Aggregate [pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [course#x], [course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [cast(udf(cast(course#x as string)) as string) AS course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -147,18 +146,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [dotNET_udf(sum(udf(earnings)))#xL, dotNET_udf(min(year))#x, Java_udf(sum(udf(earnings)))#xL, Java_udf(min(year))#x] -+- Project [__pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(udf(earnings)))#xL, __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x[0] AS dotNET_udf(min(year))#x, __pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(udf(earnings)))#xL, __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x[1] AS Java_udf(min(year))#x] - +- Aggregate [pivotfirst(course#x, CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(min(year) as string)) AS INT)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x] - +- Aggregate [course#x], [course#x, cast(udf(cast(sum(cast(udf(cast(earnings#x as string)) as int)) as string)) as bigint) AS CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)#xL, cast(udf(cast(min(year#x) as string)) as int) AS CAST(udf(cast(min(year) as string)) AS INT)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(udf(earnings)))#xL, __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x[0] AS dotNET_udf(min(year))#x, __pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(udf(earnings)))#xL, __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x[1] AS Java_udf(min(year))#x] ++- Aggregate [pivotfirst(course#x, CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(min(year) as string)) AS INT)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x] + +- Aggregate [course#x], [course#x, cast(udf(cast(sum(cast(udf(cast(earnings#x as string)) as int)) as string)) as bigint) AS CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)#xL, cast(udf(cast(min(year#x) as string)) as int) AS CAST(udf(cast(min(year) as string)) AS INT)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query From da2cdcdada6128d74a866fdee594efe7009eb68e Mon Sep 17 00:00:00 2001 From: ashahid Date: Sat, 16 Dec 2023 01:42:47 -0800 Subject: [PATCH 091/129] SPARK-45959. reworked and simplified the code. Instead of Collapsing before analyse, now collapsing after analyze --- .../resources/query-tests/explain-results/crosstab.explain | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/crosstab.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/crosstab.explain index 0487d7360201..a30cd136e8db 100644 --- a/connector/connect/common/src/test/resources/query-tests/explain-results/crosstab.explain +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/crosstab.explain @@ -1,4 +1,5 @@ Project [a_b#0] -+- Aggregate [a_b#0], [a_b#0, pivotfirst(__pivot_col#0, count(1) AS count#0L, 0, 0) AS __pivot_count(1) AS count AS `count(1) AS count`#0] - +- Aggregate [CASE WHEN isnull(a#0) THEN null ELSE cast(a#0 as string) END, CASE WHEN isnull(b#0) THEN null ELSE regexp_replace(cast(b#0 as string), `, , 1) END], [CASE WHEN isnull(a#0) THEN null ELSE cast(a#0 as string) END AS a_b#0, CASE WHEN isnull(b#0) THEN null ELSE regexp_replace(cast(b#0 as string), `, , 1) END AS __pivot_col#0, count(1) AS count(1) AS count#0L] - +- LocalRelation , [id#0L, a#0, b#0] ++- Project [a_b#0] + +- Aggregate [a_b#0], [a_b#0, pivotfirst(__pivot_col#0, count(1) AS count#0L, 0, 0) AS __pivot_count(1) AS count AS `count(1) AS count`#0] + +- Aggregate [CASE WHEN isnull(a#0) THEN null ELSE cast(a#0 as string) END, CASE WHEN isnull(b#0) THEN null ELSE regexp_replace(cast(b#0 as string), `, , 1) END], [CASE WHEN isnull(a#0) THEN null ELSE cast(a#0 as string) END AS a_b#0, CASE WHEN isnull(b#0) THEN null ELSE regexp_replace(cast(b#0 as string), `, , 1) END AS __pivot_col#0, count(1) AS count(1) AS count#0L] + +- LocalRelation , [id#0L, a#0, b#0] From 028993bbda519c97d9f8cdab2b603185974e7c78 Mon Sep 17 00:00:00 2001 From: ashahid Date: Sat, 16 Dec 2023 12:59:30 -0800 Subject: [PATCH 092/129] SPARK-45959. reworked and simplified the code. Instead of Collapsing before analyse, now collapsing after analyze --- .../connect/planner/SparkConnectPlanner.scala | 18 ++++++++++++------ .../catalyst/plans/logical/LogicalPlan.scala | 2 -- .../scala/org/apache/spark/sql/Dataset.scala | 6 +----- .../sql/internal/EarlyCollapseProject.scala | 2 -- .../spark/sql/EarlyCollapseProjectSuite.scala | 5 +++-- 5 files changed, 16 insertions(+), 17 deletions(-) diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala index 85e75c073a17..0cd6ee035f7b 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala @@ -985,12 +985,16 @@ class SparkConnectPlanner( val (colNames, newColNames) = rel.getRenamesList.asScala.toSeq.map { rename => (rename.getColName, rename.getNewColName) }.unzip - val ds = Dataset.ofRows(session, transformRelation(rel.getInput)) - ds.withColumnsRenamed(colNames, newColNames).logicalPlan + Dataset + .ofRows(session, transformRelation(rel.getInput)) + .withColumnsRenamed(colNames, newColNames) + .logicalPlan } else { // for backward compatibility - val ds = Dataset.ofRows(session, transformRelation(rel.getInput)) - ds.withColumnsRenamed(rel.getRenameColumnsMapMap).logicalPlan + Dataset + .ofRows(session, transformRelation(rel.getInput)) + .withColumnsRenamed(rel.getRenameColumnsMapMap) + .logicalPlan } } @@ -1011,8 +1015,10 @@ class SparkConnectPlanner( (alias.getName(0), Column(transformExpression(alias.getExpr)), metadata) }.unzip3 - val ds = Dataset.ofRows(session, transformRelation(rel.getInput)) - ds.withColumns(colNames, cols, metadata).logicalPlan + Dataset + .ofRows(session, transformRelation(rel.getInput)) + .withColumns(colNames, cols, metadata) + .logicalPlan } private def transformWithWatermark(rel: proto.WithWatermark): LogicalPlan = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index 7daea8197432..cce385e8d9d1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -198,8 +198,6 @@ object LogicalPlan { // to the old code path. private[spark] val PLAN_ID_TAG = TreeNodeTag[Long]("plan_id") private[spark] val IS_METADATA_COL = TreeNodeTag[Unit]("is_metadata_col") - // For Testing - private[spark] val SKIP_EARLY_PROJECT_COLLAPSE = TreeNodeTag[Boolean]("skipEarlyProjectCollapse") } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index c29ce4546860..4c13714fd0ab 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -2949,9 +2949,7 @@ class Dataset[T] private[sql]( SchemaUtils.checkColumnNameDuplication( projectList.map(_.name), sparkSession.sessionState.conf.caseSensitiveAnalysis) - withPlan( - Project(projectList, logicalPlan) - ) + withPlan(Project(projectList, logicalPlan)) } /** @@ -4497,5 +4495,3 @@ class Dataset[T] private[sql]( toArrowBatchRdd(queryExecution.executedPlan) } } - - diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala index 99c535a93411..84041dcbacdd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala @@ -33,8 +33,6 @@ private[sql] object EarlyCollapseProject { def unapply(logicalPlan: LogicalPlan): Option[LogicalPlan] = { logicalPlan match { case newP @ Project(newProjList, p @ Project(projList, child)) if - !p.getTagValue(LogicalPlan.SKIP_EARLY_PROJECT_COLLAPSE).getOrElse(false) && - !newP.getTagValue(LogicalPlan.SKIP_EARLY_PROJECT_COLLAPSE).getOrElse(false) && p.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty && newP.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty && newProjList.size >= projList.size diff --git a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala index 7349275535dc..a6f5d9d4bb2f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala @@ -244,10 +244,11 @@ class EarlyCollapseProjectSuite extends QueryTest // then obtain optimized transformation which adds new project val logicalPlan = baseDf.logicalPlan val newDfUnopt = try { - logicalPlan.setTagValue[Boolean](LogicalPlan.SKIP_EARLY_PROJECT_COLLAPSE, true) + // add a plan id tag which will cause skipping of EarlyCollapseProject rule + logicalPlan.setTagValue[Long](LogicalPlan.PLAN_ID_TAG, 100L) transformation(baseDf) } finally { - logicalPlan.unsetTagValue(LogicalPlan.SKIP_EARLY_PROJECT_COLLAPSE) + logicalPlan.unsetTagValue(LogicalPlan.PLAN_ID_TAG) } (newDfOpt, newDfUnopt) } From 18836edf156207e8ac9ea59c2df8c2575af686fa Mon Sep 17 00:00:00 2001 From: ashahid Date: Mon, 18 Dec 2023 15:12:27 -0800 Subject: [PATCH 093/129] SPARK-45959. reworked and simplified the code. Instead of Collapsing before analyse, now collapsing after analyze --- .../spark/sql/execution/CacheManager.scala | 29 ++- .../sql/internal/EarlyCollapseProject.scala | 185 ++++++------------ 2 files changed, 83 insertions(+), 131 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index 711f35e657be..3503d702123d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -336,14 +336,23 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { var matchIndexInCdPlanProj = canonicalizedCdProj.projectList.indexWhere(_ == inComingNE) if (matchIndexInCdPlanProj == -1) { - // check if it is case of rename: - inComingNE match { - case Alias(attrx: AttributeReference, _) => - matchIndexInCdPlanProj = - canonicalizedCdProj.projectList.indexWhere(_ == attrx) - case Alias(childExpr, _) => matchIndexInCdPlanProj = - canonicalizedCdProj.projectList.indexWhere( - _.children.headOption.exists(_ == childExpr)) + // if match index is -1, that means it could be two possibilities: + // 1) it is a case of rename which means the incoming expr is an alias and + // its child is an attrib ref, which may have a direct attribref in the + // cdPlanProj, or it may actually have an alias whose ref matches the ref + // of incoming attribRef + // 2) the positions in the incoming project alias and the cdPlanProject are + // different. as a result the canonicalized alias of each would have + // relatively different exprIDs ( as their relative positions differ), but + // even in such cases as their child logical plans are same, so the child + // expression of each alias will have same canonicalized data + val incomingExprToCheck = inComingNE match { + case x: AttributeReference => x + case Alias(expr, _) => expr + } + matchIndexInCdPlanProj = canonicalizedCdProj.projectList.indexWhere { + case Alias(expr, _) => expr == incomingExprToCheck + case x => x == incomingExprToCheck } } index -> matchIndexInCdPlanProj @@ -372,8 +381,8 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { cdPlanProject.projectList(cdAttribIndex).toAttribute -> incomingProject.projectList(inAttribIndex).toAttribute }.toMap - if (cdAttribToInAttrib.size == cachedPlan.output.size && - transformedIndirectlyMappableExpr.forall(_._2.references.isEmpty)) { + + if (transformedIndirectlyMappableExpr.forall(_._2.references.isEmpty)) { val projectionToForceOnCdPlan = cachedPlan.output.map(cdAttribToInAttrib) val modifiedInProj = incomingProject.projectList.zipWithIndex.map { case (ne, indx) => if (incomingToCachedPlanIndxMapping.exists(_._1 == indx)) { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala index 84041dcbacdd..34d7827d9af6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala @@ -18,132 +18,95 @@ package org.apache.spark.sql.internal import scala.util.{Failure, Success, Try} - -import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, AttributeReference, AttributeSet, Expression, NamedExpression, UserDefinedExpression, WindowExpression} +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, AttributeReference, Expression, NamedExpression, UserDefinedExpression, WindowExpression} import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} import org.apache.spark.sql.types.{Metadata, MetadataBuilder} +import org.apache.spark.util.Utils private[sql] object EarlyCollapseProject { - object OpType extends Enumeration { - type OpType = Value - val AddNewColumnsOnly, RemapOnly, Unknown = Value - } - - def unapply(logicalPlan: LogicalPlan): Option[LogicalPlan] = { + def unapply(logicalPlan: LogicalPlan): Option[LogicalPlan] = logicalPlan match { case newP @ Project(newProjList, p @ Project(projList, child)) if p.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty && - newP.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty - && newProjList.size >= projList.size - => - val currentOutputAttribs = AttributeSet(p.output) + newP.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty => // In the new column list identify those Named Expressions which are just attributes and // hence pass thru - val (passThruAttribs, tinkeredOrNewNamedExprs) = newProjList.partition { + val (_, tinkeredOrNewNamedExprs) = newProjList.partition { case _: Attribute => true case _ => false } - val passThruAttribsContainedInCurrentOutput = passThruAttribs.forall(attribute => - currentOutputAttribs.contains(attribute) || - currentOutputAttribs.exists(_.name == attribute.name)) - val opType = identifyOp(passThruAttribs, currentOutputAttribs, tinkeredOrNewNamedExprs, - passThruAttribsContainedInCurrentOutput) - opType match { - case OpType.AddNewColumnsOnly => - // case of new columns being added only - val childOutput = child.outputSet - val attribsRemappedInProj = AttributeMap( - projList.flatMap(ne => ne match { - case _: AttributeReference => Seq.empty[(Attribute, Expression)] - - case al @ Alias(expr, _) => - if (childOutput.contains(al.toAttribute)) { - Seq(al.toAttribute -> expr) - } else { - Seq.empty[(Attribute, Expression)] - } - - case _ => Seq.empty[(Attribute, Expression)] - })) - - if (tinkeredOrNewNamedExprs.exists(_.collectFirst { - // we will not flatten if expressions contain windows or aggregate as if they - // are collapsed it can cause recalculation of functions and inefficiency with - // separate group by clauses - case ex if !ex.deterministic => ex - case ex: AggregateExpression => ex - case ex: WindowExpression => ex - case ex: UserDefinedExpression => ex - }.nonEmpty)) { - None - } else { - val remappedNewProjListResult = Try { - newProjList.map { - case attr: AttributeReference => projList.find( - _.toAttribute.canonicalized == attr.canonicalized).map( - transferMetadata(attr, _)).getOrElse(attr) - - case anyOtherExpr => - (anyOtherExpr transformUp { - case attr: AttributeReference => - attribsRemappedInProj.get(attr).orElse(projList.find( - _.toAttribute.canonicalized == attr.canonicalized).map { - case al: Alias => al.child - case x => x - }).getOrElse(attr) - }).asInstanceOf[NamedExpression] - } - } - remappedNewProjListResult match { - case Success(remappedNewProjList) => Option(Project(remappedNewProjList, child)) + val childOutput = child.outputSet + val attribsRemappedInProj = AttributeMap( + projList.flatMap(ne => ne match { + case _: AttributeReference => Seq.empty[(Attribute, Expression)] - case Failure(_) => None + case al@Alias(expr, _) => + if (childOutput.contains(al.toAttribute)) { + Seq(al.toAttribute -> expr) + } else { + Seq.empty[(Attribute, Expression)] } - } - - case OpType.RemapOnly => - // case of renaming of columns - val remappedNewProjListResult = Try { - newProjList.map { - case attr: AttributeReference => projList.find( - _.toAttribute.canonicalized == attr.canonicalized).map { - // To Handle the case of change of case via toSchema - case al: Alias => if (attr.name == al.name) { - transferMetadata(attr, al) - } else { - transferMetadata(attr, al.copy(name = attr.name)( - exprId = al.exprId, qualifier = al.qualifier, - explicitMetadata = al.explicitMetadata, - nonInheritableMetadataKeys = al.nonInheritableMetadataKeys)) - } - }.get - case al@Alias(ar: AttributeReference, name) => - projList.find(_.toAttribute.canonicalized == ar.canonicalized).map { - - case alx: Alias => transferMetadata(al.toAttribute, - Alias(alx.child, name)(al.exprId, al.qualifier, - al.explicitMetadata, al.nonInheritableMetadataKeys)) - - case _: AttributeReference => al - }.get - } + case _ => Seq.empty[(Attribute, Expression)] + })) + + if (tinkeredOrNewNamedExprs.exists(_.collectFirst { + // we will not flatten if expressions contain windows or aggregate as if they + // are collapsed it can cause recalculation of functions and inefficiency with + // separate group by clauses + case ex if !ex.deterministic => ex + case ex: AggregateExpression => ex + case ex: WindowExpression => ex + case ex: UserDefinedExpression => ex + }.nonEmpty)) { + None + } else { + val remappedNewProjListResult = Try { + newProjList.map { + case attr: AttributeReference => projList.find( + _.toAttribute.canonicalized == attr.canonicalized).map { + case al: Alias => + if (attr.name == al.name) { + transferMetadata(attr, al) + } else { + // To Handle the case of change of (Caps/lowercase) via toSchema resulting + // in rename + transferMetadata(attr, al.copy(name = attr.name)( + exprId = al.exprId, qualifier = al.qualifier, + explicitMetadata = al.explicitMetadata, + nonInheritableMetadataKeys = al.nonInheritableMetadataKeys)) + } + + case _: AttributeReference => attr + }.getOrElse(attr) + + case anyOtherExpr => + (anyOtherExpr transformUp { + case attr: AttributeReference => + attribsRemappedInProj.get(attr).orElse(projList.find( + _.toAttribute.canonicalized == attr.canonicalized).map { + case al: Alias => al.child + case x => x + }).getOrElse(attr) + }).asInstanceOf[NamedExpression] } - remappedNewProjListResult match { - case Success(remappedNewProjList) => Option(Project(remappedNewProjList, child)) + } + remappedNewProjListResult match { + case Success(remappedNewProjList) => Option(Project(remappedNewProjList, child)) - case Failure(_) => None + case Failure(x) => if (Utils.isTesting) { + throw x + } else { + None } - - case _ => None + } } case _ => None } - } private def transferMetadata(from: Attribute, to: NamedExpression): NamedExpression = if (from.metadata == Metadata.empty) { @@ -162,24 +125,4 @@ private[sql] object EarlyCollapseProject { exprId = attr.exprId, qualifier = from.qualifier) } } - - private def identifyOp( - passThruAttribs: Seq[NamedExpression], - currentOutputAttribs: AttributeSet, - tinkeredOrNewNamedExprs: Seq[NamedExpression], - passThruAttribsContainedInCurrentOutput: Boolean): OpType.OpType = { - - if (passThruAttribs.size == currentOutputAttribs.size && - passThruAttribsContainedInCurrentOutput && tinkeredOrNewNamedExprs.nonEmpty) { - OpType.AddNewColumnsOnly - } else if (passThruAttribs.size + tinkeredOrNewNamedExprs.size == currentOutputAttribs.size - && passThruAttribsContainedInCurrentOutput && tinkeredOrNewNamedExprs.forall { - case Alias(_: AttributeReference, _) => true - case _ => false - }) { - OpType.RemapOnly - } else { - OpType.Unknown - } - } } From aca2ef220c8a3cb9b10801d2aa8e2f017d2864ad Mon Sep 17 00:00:00 2001 From: ashahid Date: Mon, 18 Dec 2023 15:33:42 -0800 Subject: [PATCH 094/129] SPARK-45959. reworked and simplified the code. Instead of Collapsing before analyse, now collapsing after analyze --- .../spark/sql/internal/EarlyCollapseProject.scala | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala index 34d7827d9af6..b2b2c3faa071 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala @@ -18,9 +18,9 @@ package org.apache.spark.sql.internal import scala.util.{Failure, Success, Try} -import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, AttributeReference, Expression, NamedExpression, UserDefinedExpression, WindowExpression} -import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression -import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} + +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, AttributeReference, Expression, NamedExpression, UserDefinedExpression} +import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan, Project, Window} import org.apache.spark.sql.types.{Metadata, MetadataBuilder} import org.apache.spark.util.Utils @@ -29,7 +29,8 @@ private[sql] object EarlyCollapseProject { logicalPlan match { case newP @ Project(newProjList, p @ Project(projList, child)) if p.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty && - newP.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty => + newP.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty && + !child.isInstanceOf[Window] && !child.isInstanceOf[Aggregate] => // In the new column list identify those Named Expressions which are just attributes and // hence pass thru @@ -58,8 +59,6 @@ private[sql] object EarlyCollapseProject { // are collapsed it can cause recalculation of functions and inefficiency with // separate group by clauses case ex if !ex.deterministic => ex - case ex: AggregateExpression => ex - case ex: WindowExpression => ex case ex: UserDefinedExpression => ex }.nonEmpty)) { None From 6fd6ae67d44c47cdbdcef18793f79b4b01bb42d8 Mon Sep 17 00:00:00 2001 From: ashahid Date: Mon, 18 Dec 2023 16:08:22 -0800 Subject: [PATCH 095/129] SPARK-45959. reworked and simplified the code. Instead of Collapsing before analyse, now collapsing after analyze --- .../apache/spark/sql/internal/EarlyCollapseProject.scala | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala index b2b2c3faa071..f8c6a63e3df1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala @@ -28,10 +28,9 @@ private[sql] object EarlyCollapseProject { def unapply(logicalPlan: LogicalPlan): Option[LogicalPlan] = logicalPlan match { case newP @ Project(newProjList, p @ Project(projList, child)) if - p.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty && - newP.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty && - !child.isInstanceOf[Window] && !child.isInstanceOf[Aggregate] => - + p.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty && + newP.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty && + !child.isInstanceOf[Window] => // In the new column list identify those Named Expressions which are just attributes and // hence pass thru val (_, tinkeredOrNewNamedExprs) = newProjList.partition { From af8dc7c16d87b948f122a9711f4822a161ff631a Mon Sep 17 00:00:00 2001 From: ashahid Date: Tue, 19 Dec 2023 01:41:07 -0800 Subject: [PATCH 096/129] SPARK-45959. reworked and simplified the code. Instead of Collapsing before analyse, now collapsing after analyze --- .../analysis/ColumnResolutionHelper.scala | 21 +- .../catalyst/plans/logical/LogicalPlan.scala | 8 +- .../spark/sql/execution/CacheManager.scala | 3 +- .../sql/internal/EarlyCollapseProject.scala | 13 +- .../column-resolution-aggregate.sql.out | 15 +- .../analyzer-results/natural-join.sql.out | 377 ++++----- .../sql-tests/analyzer-results/pivot.sql.out | 490 ++++++----- .../analyzer-results/postgreSQL/join.sql.out | 277 +++--- .../udf/postgreSQL/udf-join.sql.out | 219 +++-- .../analyzer-results/udf/udf-pivot.sql.out | 490 ++++++----- .../analyzer-results/using-join.sql.out | 793 +++++++++--------- 11 files changed, 1320 insertions(+), 1386 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala index a90c61565039..cd68b21b06dc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala @@ -59,24 +59,35 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase { val (newExprs, newChild) = { // Resolving expressions against current plan. val maybeResolvedExprs = exprs.map(resolveExpressionByPlanOutput(_, u)) + // Recursively resolving expressions on the child of current plan. resolveExprsAndAddMissingAttrs(maybeResolvedExprs, u.child) } // If some attributes used by expressions are resolvable only on the rewritten child // plan, we need to add them into original projection. - lazy val missingAttrs = - (AttributeSet(newExprs) -- u.outputSet).intersect(newChild.outputSet) + lazy val (missingAttrsFromOutput, missingAttrsFromDroppedAttr) = { + val missing = (AttributeSet(newExprs) -- u.outputSet) + missing.intersect(newChild.outputSet) -> + missing.intersect(u.getTagValue(LogicalPlan.DROPPED_NAMED_EXPRESSIONS). + map(sq => AttributeSet(sq.map(_.toAttribute))).getOrElse(AttributeSet.empty)) + } u match { case p: Project => - val newProject = Project(p.projectList ++ missingAttrs, newChild) + val droppedNamedExprs = p.getTagValue(LogicalPlan.DROPPED_NAMED_EXPRESSIONS). + getOrElse(Seq.empty) + val newProject = Project(p.projectList ++ missingAttrsFromOutput ++ + missingAttrsFromDroppedAttr.map(attr => + droppedNamedExprs.find(_.toAttribute == attr).get), newChild) newProject.copyTagsFrom(p) (newExprs, newProject) case a @ Aggregate(groupExprs, aggExprs, child) => - if (missingAttrs.forall(attr => groupExprs.exists(_.semanticEquals(attr)))) { + if (missingAttrsFromOutput.forall(attr => groupExprs.exists( + _.semanticEquals(attr)))) { // All the missing attributes are grouping expressions, valid case. (newExprs, - a.copy(aggregateExpressions = aggExprs ++ missingAttrs, child = newChild)) + a.copy(aggregateExpressions = aggExprs ++ missingAttrsFromOutput, + child = newChild)) } else { // Need to add non-grouping attributes, invalid case. (exprs, a) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index cce385e8d9d1..23dbc5e2440d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -132,6 +132,9 @@ abstract class LogicalPlan private[this] lazy val outputAttributes = AttributeSeq.fromNormalOutput(output) + private[this] lazy val droppedAttributes = this.getTagValue( + LogicalPlan.DROPPED_NAMED_EXPRESSIONS).map(_.map(_.toAttribute)).getOrElse(Seq.empty) + private[this] lazy val outputMetadataAttributes = AttributeSeq(metadataOutput) /** @@ -154,7 +157,8 @@ abstract class LogicalPlan nameParts: Seq[String], resolver: Resolver): Option[NamedExpression] = outputAttributes.resolve(nameParts, resolver) - .orElse(outputMetadataAttributes.resolve(nameParts, resolver)) + .orElse(outputMetadataAttributes.resolve(nameParts, resolver)).orElse( + droppedAttributes.resolve(nameParts, resolver)) /** * Given an attribute name, split it to name parts by dot, but @@ -198,6 +202,8 @@ object LogicalPlan { // to the old code path. private[spark] val PLAN_ID_TAG = TreeNodeTag[Long]("plan_id") private[spark] val IS_METADATA_COL = TreeNodeTag[Unit]("is_metadata_col") + private[spark] val DROPPED_NAMED_EXPRESSIONS = + TreeNodeTag[Seq[NamedExpression]]("dropped_namedexprs") } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index 3503d702123d..39ecdf161a76 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -383,7 +383,8 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { }.toMap if (transformedIndirectlyMappableExpr.forall(_._2.references.isEmpty)) { - val projectionToForceOnCdPlan = cachedPlan.output.map(cdAttribToInAttrib) + val projectionToForceOnCdPlan = cachedPlan.output.flatMap(cdAttr => + cdAttribToInAttrib.get(cdAttr).map(Seq(_)).getOrElse(Seq.empty)) val modifiedInProj = incomingProject.projectList.zipWithIndex.map { case (ne, indx) => if (incomingToCachedPlanIndxMapping.exists(_._1 == indx)) { ne.toAttribute diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala index f8c6a63e3df1..af87277e56ab 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.internal import scala.util.{Failure, Success, Try} import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, AttributeReference, Expression, NamedExpression, UserDefinedExpression} -import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan, Project, Window} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, Window} import org.apache.spark.sql.types.{Metadata, MetadataBuilder} import org.apache.spark.util.Utils @@ -93,7 +93,16 @@ private[sql] object EarlyCollapseProject { } } remappedNewProjListResult match { - case Success(remappedNewProjList) => Option(Project(remappedNewProjList, child)) + case Success(remappedNewProjList) => + val newProject = Project(remappedNewProjList, child) + val droppedNamedExprs = projList.filter(ne => + remappedNewProjList.forall(_.toAttribute != ne.toAttribute)) + val newDroppedList = p.getTagValue(LogicalPlan.DROPPED_NAMED_EXPRESSIONS).map( + _ ++ droppedNamedExprs).getOrElse(droppedNamedExprs) + if (newDroppedList.nonEmpty) { + newProject.setTagValue(LogicalPlan.DROPPED_NAMED_EXPRESSIONS, newDroppedList) + } + Option(newProject) case Failure(x) => if (Utils.isTesting) { throw x diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out index 0c213899b64e..1f301fe67598 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out @@ -114,14 +114,13 @@ org.apache.spark.sql.AnalysisException -- !query SELECT k AS lca, lca + 1 AS col FROM v1 GROUP BY lca -- !query analysis -Project [lca#x, (lca#x + 1) AS col#x] -+- Project [k#x, k#x AS lca#x] - +- Aggregate [k#x], [k#x] - +- SubqueryAlias v1 - +- View (`v1`, [a#x,b#x,k#x]) - +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x, cast(k#x as int) AS k#x] - +- SubqueryAlias t - +- LocalRelation [a#x, b#x, k#x] +Project [k#x AS lca#x, (k#x + 1) AS col#x] ++- Aggregate [k#x], [k#x] + +- SubqueryAlias v1 + +- View (`v1`, [a#x,b#x,k#x]) + +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x, cast(k#x as int) AS k#x] + +- SubqueryAlias t + +- LocalRelation [a#x, b#x, k#x] -- !query diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out index 41c8876a7d25..d01e2b7403ae 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out @@ -71,20 +71,19 @@ CreateViewCommand `nt4`, select * from values SELECT * FROM nt1 natural join nt2 -- !query analysis Project [k#x, v1#x, v2#x] -+- Project [k#x, v1#x, v2#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -174,20 +173,19 @@ Aggregate [count(1) AS count(1)#xL] SELECT k FROM nt1 natural join nt2 -- !query analysis Project [k#x] -+- Project [k#x, v1#x, v2#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -215,40 +213,38 @@ Project [k#x] SELECT nt1.* FROM nt1 natural join nt2 -- !query analysis Project [k#x, v1#x] -+- Project [k#x, v1#x, v2#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt2.* FROM nt1 natural join nt2 -- !query analysis Project [k#x, v2#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -299,60 +295,57 @@ Project [k#x] SELECT nt1.*, nt2.* FROM nt1 natural join nt2 -- !query analysis Project [k#x, v1#x, k#x, v2#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT *, nt2.k FROM nt1 natural join nt2 -- !query analysis Project [k#x, v1#x, v2#x, k#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.k, nt2.k FROM nt1 natural join nt2 -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -510,56 +503,54 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException SELECT * FROM nt1 natural join nt2 natural join nt3 -- !query analysis Project [k#x, v1#x, v2#x, v3#x] -+- Project [k#x, v1#x, v2#x, v3#x] - +- Join Inner, (k#x = k#x) - :- Project [k#x, v1#x, v2#x] - : +- Join Inner, (k#x = k#x) - : :- SubqueryAlias nt1 - : : +- View (`nt1`, [k#x,v1#x]) - : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : : +- Project [k#x, v1#x] - : : +- SubqueryAlias nt1 - : : +- LocalRelation [k#x, v1#x] - : +- SubqueryAlias nt2 - : +- View (`nt2`, [k#x,v2#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - : +- Project [k#x, v2#x] - : +- SubqueryAlias nt2 - : +- LocalRelation [k#x, v2#x] - +- SubqueryAlias nt3 - +- View (`nt3`, [k#x,v3#x]) - +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] - +- Project [k#x, v3#x] - +- SubqueryAlias nt3 - +- LocalRelation [k#x, v3#x] ++- Join Inner, (k#x = k#x) + :- Project [k#x, v1#x, v2#x] + : +- Join Inner, (k#x = k#x) + : :- SubqueryAlias nt1 + : : +- View (`nt1`, [k#x,v1#x]) + : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : : +- Project [k#x, v1#x] + : : +- SubqueryAlias nt1 + : : +- LocalRelation [k#x, v1#x] + : +- SubqueryAlias nt2 + : +- View (`nt2`, [k#x,v2#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + : +- Project [k#x, v2#x] + : +- SubqueryAlias nt2 + : +- LocalRelation [k#x, v2#x] + +- SubqueryAlias nt3 + +- View (`nt3`, [k#x,v3#x]) + +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] + +- Project [k#x, v3#x] + +- SubqueryAlias nt3 + +- LocalRelation [k#x, v3#x] -- !query SELECT nt1.*, nt2.*, nt3.* FROM nt1 natural join nt2 natural join nt3 -- !query analysis Project [k#x, v1#x, k#x, v2#x, k#x, v3#x] -+- Project [k#x, v1#x, v2#x, v3#x, k#x, k#x] - +- Join Inner, (k#x = k#x) - :- Project [k#x, v1#x, v2#x, k#x] - : +- Join Inner, (k#x = k#x) - : :- SubqueryAlias nt1 - : : +- View (`nt1`, [k#x,v1#x]) - : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : : +- Project [k#x, v1#x] - : : +- SubqueryAlias nt1 - : : +- LocalRelation [k#x, v1#x] - : +- SubqueryAlias nt2 - : +- View (`nt2`, [k#x,v2#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - : +- Project [k#x, v2#x] - : +- SubqueryAlias nt2 - : +- LocalRelation [k#x, v2#x] - +- SubqueryAlias nt3 - +- View (`nt3`, [k#x,v3#x]) - +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] - +- Project [k#x, v3#x] - +- SubqueryAlias nt3 - +- LocalRelation [k#x, v3#x] ++- Join Inner, (k#x = k#x) + :- Project [k#x, v1#x, v2#x, k#x] + : +- Join Inner, (k#x = k#x) + : :- SubqueryAlias nt1 + : : +- View (`nt1`, [k#x,v1#x]) + : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : : +- Project [k#x, v1#x] + : : +- SubqueryAlias nt1 + : : +- LocalRelation [k#x, v1#x] + : +- SubqueryAlias nt2 + : +- View (`nt2`, [k#x,v2#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + : +- Project [k#x, v2#x] + : +- SubqueryAlias nt2 + : +- LocalRelation [k#x, v2#x] + +- SubqueryAlias nt3 + +- View (`nt3`, [k#x,v3#x]) + +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] + +- Project [k#x, v3#x] + +- SubqueryAlias nt3 + +- LocalRelation [k#x, v3#x] -- !query @@ -620,61 +611,59 @@ Project [k#x, v1#x, v2#x, k#x, v3#x] SELECT * FROM nt1 natural join nt2 join nt3 on nt2.k = nt3.k -- !query analysis Project [k#x, v1#x, v2#x, k#x, v3#x] -+- Project [k#x, v1#x, v2#x, k#x, v3#x] - +- Join Inner, (k#x = k#x) - :- Project [k#x, v1#x, v2#x, k#x] - : +- Join Inner, (k#x = k#x) - : :- SubqueryAlias nt1 - : : +- View (`nt1`, [k#x,v1#x]) - : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : : +- Project [k#x, v1#x] - : : +- SubqueryAlias nt1 - : : +- LocalRelation [k#x, v1#x] - : +- SubqueryAlias nt2 - : +- View (`nt2`, [k#x,v2#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - : +- Project [k#x, v2#x] - : +- SubqueryAlias nt2 - : +- LocalRelation [k#x, v2#x] - +- SubqueryAlias nt3 - +- View (`nt3`, [k#x,v3#x]) - +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] - +- Project [k#x, v3#x] - +- SubqueryAlias nt3 - +- LocalRelation [k#x, v3#x] ++- Join Inner, (k#x = k#x) + :- Project [k#x, v1#x, v2#x, k#x] + : +- Join Inner, (k#x = k#x) + : :- SubqueryAlias nt1 + : : +- View (`nt1`, [k#x,v1#x]) + : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : : +- Project [k#x, v1#x] + : : +- SubqueryAlias nt1 + : : +- LocalRelation [k#x, v1#x] + : +- SubqueryAlias nt2 + : +- View (`nt2`, [k#x,v2#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + : +- Project [k#x, v2#x] + : +- SubqueryAlias nt2 + : +- LocalRelation [k#x, v2#x] + +- SubqueryAlias nt3 + +- View (`nt3`, [k#x,v3#x]) + +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] + +- Project [k#x, v3#x] + +- SubqueryAlias nt3 + +- LocalRelation [k#x, v3#x] -- !query SELECT nt1.*, nt2.*, nt3.*, nt4.* FROM nt1 natural join nt2 natural join nt3 natural join nt4 -- !query analysis Project [k#x, v1#x, k#x, v2#x, k#x, v3#x, k#x, v4#x] -+- Project [k#x, v1#x, v2#x, v3#x, v4#x, k#x, k#x, k#x] - +- Join Inner, (k#x = k#x) - :- Project [k#x, v1#x, v2#x, v3#x, k#x, k#x] - : +- Join Inner, (k#x = k#x) - : :- Project [k#x, v1#x, v2#x, k#x] - : : +- Join Inner, (k#x = k#x) - : : :- SubqueryAlias nt1 - : : : +- View (`nt1`, [k#x,v1#x]) - : : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : : : +- Project [k#x, v1#x] - : : : +- SubqueryAlias nt1 - : : : +- LocalRelation [k#x, v1#x] - : : +- SubqueryAlias nt2 - : : +- View (`nt2`, [k#x,v2#x]) - : : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - : : +- Project [k#x, v2#x] - : : +- SubqueryAlias nt2 - : : +- LocalRelation [k#x, v2#x] - : +- SubqueryAlias nt3 - : +- View (`nt3`, [k#x,v3#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] - : +- Project [k#x, v3#x] - : +- SubqueryAlias nt3 - : +- LocalRelation [k#x, v3#x] - +- SubqueryAlias nt4 - +- View (`nt4`, [k#x,v4#x]) - +- Project [cast(k#x as string) AS k#x, cast(v4#x as int) AS v4#x] - +- Project [k#x, v4#x] - +- SubqueryAlias nt4 - +- LocalRelation [k#x, v4#x] ++- Join Inner, (k#x = k#x) + :- Project [k#x, v1#x, v2#x, v3#x, k#x, k#x] + : +- Join Inner, (k#x = k#x) + : :- Project [k#x, v1#x, v2#x, k#x] + : : +- Join Inner, (k#x = k#x) + : : :- SubqueryAlias nt1 + : : : +- View (`nt1`, [k#x,v1#x]) + : : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : : : +- Project [k#x, v1#x] + : : : +- SubqueryAlias nt1 + : : : +- LocalRelation [k#x, v1#x] + : : +- SubqueryAlias nt2 + : : +- View (`nt2`, [k#x,v2#x]) + : : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + : : +- Project [k#x, v2#x] + : : +- SubqueryAlias nt2 + : : +- LocalRelation [k#x, v2#x] + : +- SubqueryAlias nt3 + : +- View (`nt3`, [k#x,v3#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] + : +- Project [k#x, v3#x] + : +- SubqueryAlias nt3 + : +- LocalRelation [k#x, v3#x] + +- SubqueryAlias nt4 + +- View (`nt4`, [k#x,v4#x]) + +- Project [cast(k#x as string) AS k#x, cast(v4#x as int) AS v4#x] + +- Project [k#x, v4#x] + +- SubqueryAlias nt4 + +- LocalRelation [k#x, v4#x] diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out index 24441bf806a6..23df9320925d 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out @@ -59,18 +59,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET#xL, Java#xL] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java#xL] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java#xL] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -80,16 +79,15 @@ PIVOT ( FOR year IN (2012, 2013) ) -- !query analysis -Project [course#x, 2012#xL, 2013#xL] -+- Project [course#x, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[0] AS 2012#xL, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[1] AS 2013#xL] - +- Aggregate [course#x], [course#x, pivotfirst(year#x, sum(coursesales.earnings)#xL, 2012, 2013, 0, 0) AS __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x] - +- Aggregate [course#x, year#x], [course#x, year#x, sum(earnings#x) AS sum(coursesales.earnings)#xL] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [course#x, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[0] AS 2012#xL, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[1] AS 2013#xL] ++- Aggregate [course#x], [course#x, pivotfirst(year#x, sum(coursesales.earnings)#xL, 2012, 2013, 0, 0) AS __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x] + +- Aggregate [course#x, year#x], [course#x, year#x, sum(earnings#x) AS sum(coursesales.earnings)#xL] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -101,18 +99,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET_sum(earnings)#xL, dotNET_avg(earnings)#x, Java_sum(earnings)#xL, Java_avg(earnings)#x] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_avg(earnings)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x[1] AS Java_avg(earnings)#x] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, avg(__auto_generated_subquery_name.earnings)#x, dotNET, Java, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, avg(earnings#x) AS avg(__auto_generated_subquery_name.earnings)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_avg(earnings)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x[1] AS Java_avg(earnings)#x] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, avg(__auto_generated_subquery_name.earnings)#x, dotNET, Java, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, avg(earnings#x) AS avg(__auto_generated_subquery_name.earnings)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -170,25 +167,24 @@ PIVOT ( FOR s IN (1, 2) ) -- !query analysis -Project [course#x, year#x, 1#xL, 2#xL] -+- Project [course#x, year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS 1#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS 2#xL] - +- Aggregate [course#x, year#x], [course#x, year#x, pivotfirst(s#x, sum(__auto_generated_subquery_name.earnings)#xL, 1, 2, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [course#x, year#x, s#x], [course#x, year#x, s#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [course#x, year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS 1#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS 2#xL] ++- Aggregate [course#x, year#x], [course#x, year#x, pivotfirst(s#x, sum(__auto_generated_subquery_name.earnings)#xL, 1, 2, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [course#x, year#x, s#x], [course#x, year#x, s#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -202,25 +198,24 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET_sum(earnings)#xL, dotNET_min(s)#x, Java_sum(earnings)#xL, Java_min(s)#x] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x[0] AS dotNET_min(s)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x[1] AS Java_min(s)#x] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, min(__auto_generated_subquery_name.s)#x, dotNET, Java, 0, 0) AS __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, min(s#x) AS min(__auto_generated_subquery_name.s)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x[0] AS dotNET_min(s)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x[1] AS Java_min(s)#x] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, min(__auto_generated_subquery_name.s)#x, dotNET, Java, 0, 0) AS __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, min(s#x) AS min(__auto_generated_subquery_name.s)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -234,25 +229,24 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET#xL, Java#xL] -+- Project [year#x, __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x[0] AS dotNET#xL, __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x[1] AS Java#xL] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))#xL, dotNET, Java, 0, 0) AS __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, sum((earnings#x * s#x)) AS sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x[0] AS dotNET#xL, __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x[1] AS Java#xL] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))#xL, dotNET, Java, 0, 0) AS __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, sum((earnings#x * s#x)) AS sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -264,18 +258,17 @@ PIVOT ( FOR y IN (2012, 2013) ) -- !query analysis -Project [2012_s#xL, 2013_s#xL, 2012_a#x, 2013_a#x, c#x] -+- Project [c#x, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[0] AS 2012_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[0] AS 2012_a#x, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[1] AS 2013_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[1] AS 2013_a#x] - +- Aggregate [c#x], [c#x, pivotfirst(y#x, sum(__auto_generated_subquery_name.e) AS s#xL, 2012, 2013, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x, pivotfirst(y#x, avg(__auto_generated_subquery_name.e) AS a#x, 2012, 2013, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x] - +- Aggregate [c#x, y#x], [c#x, y#x, sum(e#x) AS sum(__auto_generated_subquery_name.e) AS s#xL, avg(e#x) AS avg(__auto_generated_subquery_name.e) AS a#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[0] AS 2012_s#xL, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[1] AS 2013_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[0] AS 2012_a#x, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[1] AS 2013_a#x, c#x] ++- Aggregate [c#x], [c#x, pivotfirst(y#x, sum(__auto_generated_subquery_name.e) AS s#xL, 2012, 2013, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x, pivotfirst(y#x, avg(__auto_generated_subquery_name.e) AS a#x, 2012, 2013, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x] + +- Aggregate [c#x, y#x], [c#x, y#x, sum(e#x) AS sum(__auto_generated_subquery_name.e) AS s#xL, avg(e#x) AS avg(__auto_generated_subquery_name.e) AS a#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -287,18 +280,17 @@ PIVOT ( FOR y IN (2012 as firstYear, 2013 secondYear) ) -- !query analysis -Project [firstYear_s#xL, secondYear_s#xL, firstYear_a#x, secondYear_a#x, c#x] -+- Project [c#x, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[0] AS firstYear_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[0] AS firstYear_a#x, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[1] AS secondYear_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[1] AS secondYear_a#x] - +- Aggregate [c#x], [c#x, pivotfirst(y#x, sum(__auto_generated_subquery_name.e) AS s#xL, 2012, 2013, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x, pivotfirst(y#x, avg(__auto_generated_subquery_name.e) AS a#x, 2012, 2013, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x] - +- Aggregate [c#x, y#x], [c#x, y#x, sum(e#x) AS sum(__auto_generated_subquery_name.e) AS s#xL, avg(e#x) AS avg(__auto_generated_subquery_name.e) AS a#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[0] AS firstYear_s#xL, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[1] AS secondYear_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[0] AS firstYear_a#x, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[1] AS secondYear_a#x, c#x] ++- Aggregate [c#x], [c#x, pivotfirst(y#x, sum(__auto_generated_subquery_name.e) AS s#xL, 2012, 2013, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x, pivotfirst(y#x, avg(__auto_generated_subquery_name.e) AS a#x, 2012, 2013, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x] + +- Aggregate [c#x, y#x], [c#x, y#x, sum(e#x) AS sum(__auto_generated_subquery_name.e) AS s#xL, avg(e#x) AS avg(__auto_generated_subquery_name.e) AS a#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -371,18 +363,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET_CEIL(sum(earnings))#xL, dotNET_a1#x, Java_CEIL(sum(earnings))#xL, Java_a1#x] -+- Project [year#x, __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x[0] AS dotNET_CEIL(sum(earnings))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[0] AS dotNET_a1#x, __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x[1] AS Java_CEIL(sum(earnings))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[1] AS Java_a1#x] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, CEIL(sum(__auto_generated_subquery_name.earnings))#xL, dotNET, Java, 0, 0) AS __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x, pivotfirst(course#x, (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x, dotNET, Java, 0, 0) AS __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, CEIL(sum(earnings#x)) AS CEIL(sum(__auto_generated_subquery_name.earnings))#xL, (avg(earnings#x) + cast(1 as double)) AS (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [year#x, __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x[0] AS dotNET_CEIL(sum(earnings))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[0] AS dotNET_a1#x, __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x[1] AS Java_CEIL(sum(earnings))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[1] AS Java_a1#x] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, CEIL(sum(__auto_generated_subquery_name.earnings))#xL, dotNET, Java, 0, 0) AS __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x, pivotfirst(course#x, (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x, dotNET, Java, 0, 0) AS __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, CEIL(sum(earnings#x)) AS CEIL(sum(__auto_generated_subquery_name.earnings))#xL, (avg(earnings#x) + cast(1 as double)) AS (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -419,25 +410,24 @@ PIVOT ( FOR (course, year) IN (('dotNET', 2012), ('Java', 2013)) ) -- !query analysis -Project [s#x, {dotNET, 2012}#xL, {Java, 2013}#xL] -+- Project [s#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, 2012}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, 2013}#xL] - +- Aggregate [s#x], [s#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,2012], [Java,2013], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [s#x, named_struct(course, course#x, year, year#x)], [s#x, named_struct(course, course#x, year, year#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [s#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, 2012}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, 2013}#xL] ++- Aggregate [s#x], [s#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,2012], [Java,2013], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [s#x, named_struct(course, course#x, year, year#x)], [s#x, named_struct(course, course#x, year, year#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -451,25 +441,24 @@ PIVOT ( FOR (course, s) IN (('dotNET', 2) as c1, ('Java', 1) as c2) ) -- !query analysis -Project [year#x, c1#xL, c2#xL] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS c1#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS c2#xL] - +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,2], [Java,1], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS c1#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS c2#xL] ++- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,2], [Java,1], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -608,25 +597,24 @@ PIVOT ( FOR a IN (array(1, 1), array(2, 2)) ) -- !query analysis -Project [year#x, [1, 1]#xL, [2, 2]#xL] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS [1, 1]#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS [2, 2]#xL] - +- Aggregate [year#x], [year#x, pivotfirst(a#x, sum(__auto_generated_subquery_name.earnings)#xL, [1,1], [2,2], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, a#x], [year#x, a#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [earnings#x, year#x, a#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS [1, 1]#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS [2, 2]#xL] ++- Aggregate [year#x], [year#x, pivotfirst(a#x, sum(__auto_generated_subquery_name.earnings)#xL, [1,1], [2,2], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, a#x], [year#x, a#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [earnings#x, year#x, a#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -640,25 +628,24 @@ PIVOT ( FOR (course, a) IN (('dotNET', array(1, 1)), ('Java', array(2, 2))) ) -- !query analysis -Project [year#x, {dotNET, [1, 1]}#xL, {Java, [2, 2]}#xL] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, [1, 1]}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, [2, 2]}#xL] - +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,[1,1]], [Java,[2,2]], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, named_struct(course, course#x, a, a#x)], [year#x, named_struct(course, course#x, a, a#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, year#x, a#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, [1, 1]}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, [2, 2]}#xL] ++- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,[1,1]], [Java,[2,2]], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, named_struct(course, course#x, a, a#x)], [year#x, named_struct(course, course#x, a, a#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, year#x, a#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -672,25 +659,24 @@ PIVOT ( FOR s IN ((1, 'a'), (2, 'b')) ) -- !query analysis -Project [year#x, {1, a}#xL, {2, b}#xL] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {1, a}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {2, b}#xL] - +- Aggregate [year#x], [year#x, pivotfirst(s#x, sum(__auto_generated_subquery_name.earnings)#xL, [1,a], [2,b], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, s#x], [year#x, s#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [earnings#x, year#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {1, a}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {2, b}#xL] ++- Aggregate [year#x], [year#x, pivotfirst(s#x, sum(__auto_generated_subquery_name.earnings)#xL, [1,a], [2,b], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, s#x], [year#x, s#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [earnings#x, year#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -704,25 +690,24 @@ PIVOT ( FOR (course, s) IN (('dotNET', (1, 'a')), ('Java', (2, 'b'))) ) -- !query analysis -Project [year#x, {dotNET, {1, a}}#xL, {Java, {2, b}}#xL] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, {1, a}}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, {2, b}}#xL] - +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,[1,a]], [Java,[2,b]], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, year#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, {1, a}}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, {2, b}}#xL] ++- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,[1,a]], [Java,[2,b]], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, year#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -777,15 +762,14 @@ PIVOT ( FOR Course IN ('dotNET', 'Java') ) -- !query analysis -Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, dotNET#xL, Java#xL] -+- Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x[1] AS Java#xL] - +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, pivotfirst(Course#x, sum(__auto_generated_subquery_name.Earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x] - +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x, sum(Earnings#x) AS sum(__auto_generated_subquery_name.Earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, a AS a#x, z AS z#x, b AS b#x, y AS y#x, c AS c#x, x AS x#x, d AS d#x, w AS w#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x[1] AS Java#xL] ++- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, pivotfirst(Course#x, sum(__auto_generated_subquery_name.Earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x] + +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x, sum(Earnings#x) AS sum(__auto_generated_subquery_name.Earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, a AS a#x, z AS z#x, b AS b#x, y AS y#x, c AS c#x, x AS x#x, d AS d#x, w AS w#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out index c72ddedce53d..497f36ff5644 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out @@ -722,17 +722,16 @@ InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_d -- !query SELECT * FROM t1 FULL JOIN t2 USING (name) FULL JOIN t3 USING (name) -- !query analysis -Project [name#x, n#x, n#x, n#x] -+- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x, n#x] - +- Join FullOuter, (name#x = name#x) - :- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x] - : +- Join FullOuter, (name#x = name#x) - : :- SubqueryAlias spark_catalog.default.t1 - : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, n#x, n#x, n#x] ++- Join FullOuter, (name#x = name#x) + :- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x] + : +- Join FullOuter, (name#x = name#x) + : :- SubqueryAlias spark_catalog.default.t1 + : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -743,16 +742,15 @@ INNER JOIN USING (name) -- !query analysis Project [name#x, n#x, n#x] -+- Project [name#x, n#x, n#x] - +- Join Inner, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join Inner, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -763,16 +761,15 @@ LEFT JOIN USING (name) -- !query analysis Project [name#x, n#x, n#x] -+- Project [name#x, n#x, n#x] - +- Join LeftOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join LeftOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -782,17 +779,16 @@ FULL JOIN (SELECT * FROM t3) s3 USING (name) -- !query analysis -Project [name#x, n#x, n#x] -+- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, n#x, n#x] ++- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -802,16 +798,15 @@ NATURAL INNER JOIN (SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join Inner, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join Inner, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -821,16 +816,15 @@ NATURAL LEFT JOIN (SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join LeftOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join LeftOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -839,17 +833,16 @@ SELECT * FROM NATURAL FULL JOIN (SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis -Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] ++- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -861,22 +854,21 @@ NATURAL INNER JOIN (SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join Inner, (name#x = name#x) - :- Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] - : +- Join Inner, (name#x = name#x) - : :- SubqueryAlias s1 - : : +- Project [name#x, n#x AS s1_n#x, 1 AS s1_1#x] - : : +- SubqueryAlias spark_catalog.default.t1 - : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - : +- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join Inner, (name#x = name#x) + :- Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] + : +- Join Inner, (name#x = name#x) + : :- SubqueryAlias s1 + : : +- Project [name#x, n#x AS s1_n#x, 1 AS s1_1#x] + : : +- SubqueryAlias spark_catalog.default.t1 + : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + : +- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -887,23 +879,22 @@ NATURAL FULL JOIN NATURAL FULL JOIN (SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis -Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join FullOuter, (name#x = name#x) - :- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] - : +- Join FullOuter, (name#x = name#x) - : :- SubqueryAlias s1 - : : +- Project [name#x, n#x AS s1_n#x, 1 AS s1_1#x] - : : +- SubqueryAlias spark_catalog.default.t1 - : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - : +- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] ++- Join FullOuter, (name#x = name#x) + :- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] + : +- Join FullOuter, (name#x = name#x) + : :- SubqueryAlias s1 + : : +- Project [name#x, n#x AS s1_n#x, 1 AS s1_1#x] + : : +- SubqueryAlias spark_catalog.default.t1 + : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + : +- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -916,25 +907,24 @@ NATURAL FULL JOIN (SELECT name, n as s3_n FROM t3) as s3 ) ss2 -- !query analysis -Project [name#x, s1_n#x, s2_n#x, s3_n#x] -+- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s2_n#x, s3_n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s1 - : +- Project [name#x, n#x AS s1_n#x] - : +- SubqueryAlias spark_catalog.default.t1 - : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - +- SubqueryAlias ss2 - +- Project [name#x, s2_n#x, s3_n#x] - +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s3_n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s2_n#x, s3_n#x] ++- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s1 + : +- Project [name#x, n#x AS s1_n#x] + : +- SubqueryAlias spark_catalog.default.t1 + : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + +- SubqueryAlias ss2 + +- Project [name#x, s2_n#x, s3_n#x] + +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s3_n#x] + +- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -947,25 +937,24 @@ NATURAL FULL JOIN (SELECT name, n as s3_n FROM t3) as s3 ) ss2 -- !query analysis -Project [name#x, s1_n#x, s2_n#x, s2_2#x, s3_n#x] -+- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s2_n#x, s2_2#x, s3_n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s1 - : +- Project [name#x, n#x AS s1_n#x] - : +- SubqueryAlias spark_catalog.default.t1 - : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - +- SubqueryAlias ss2 - +- Project [name#x, s2_n#x, s2_2#x, s3_n#x] - +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s2_n#x, s2_2#x, s3_n#x] ++- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s1 + : +- Project [name#x, n#x AS s1_n#x] + : +- SubqueryAlias spark_catalog.default.t1 + : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + +- SubqueryAlias ss2 + +- Project [name#x, s2_n#x, s2_2#x, s3_n#x] + +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x] + +- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out index e5f51cd80bf0..b0809ab8a9fc 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out @@ -733,17 +733,16 @@ InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_d -- !query SELECT * FROM t1 FULL JOIN t2 USING (name) FULL JOIN t3 USING (name) -- !query analysis -Project [name#x, n#x, n#x, n#x] -+- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x, n#x] - +- Join FullOuter, (name#x = name#x) - :- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x] - : +- Join FullOuter, (name#x = name#x) - : :- SubqueryAlias spark_catalog.default.t1 - : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, n#x, n#x, n#x] ++- Join FullOuter, (name#x = name#x) + :- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x] + : +- Join FullOuter, (name#x = name#x) + : :- SubqueryAlias spark_catalog.default.t1 + : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -754,16 +753,15 @@ INNER JOIN USING (name) -- !query analysis Project [name#x, n#x, n#x] -+- Project [name#x, n#x, n#x] - +- Join Inner, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join Inner, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -774,16 +772,15 @@ LEFT JOIN USING (name) -- !query analysis Project [name#x, n#x, n#x] -+- Project [name#x, n#x, n#x] - +- Join LeftOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join LeftOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -813,16 +810,15 @@ NATURAL INNER JOIN (SELECT udf(name) as name, udf(udf(n)) as s3_n, udf(3) as s3_2 FROM t3) s3 -- !query analysis Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join Inner, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x, cast(udf(cast(2 as string)) as int) AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s3_n#x, cast(udf(cast(3 as string)) as int) AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join Inner, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x, cast(udf(cast(2 as string)) as int) AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s3_n#x, cast(udf(cast(3 as string)) as int) AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -832,16 +828,15 @@ NATURAL LEFT JOIN (SELECT udf(udf(name)) as name, udf(n) as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join LeftOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join LeftOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -850,17 +845,16 @@ SELECT * FROM NATURAL FULL JOIN (SELECT udf(udf(name)) as name, udf(udf(n)) as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis -Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] ++- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -872,22 +866,21 @@ NATURAL INNER JOIN (SELECT udf(udf(udf(name))) as name, udf(n) as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join Inner, (name#x = name#x) - :- Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] - : +- Join Inner, (name#x = name#x) - : :- SubqueryAlias s1 - : : +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s1_n#x, 1 AS s1_1#x] - : : +- SubqueryAlias spark_catalog.default.t1 - : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - : +- SubqueryAlias s2 - : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join Inner, (name#x = name#x) + :- Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] + : +- Join Inner, (name#x = name#x) + : :- SubqueryAlias s1 + : : +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s1_n#x, 1 AS s1_1#x] + : : +- SubqueryAlias spark_catalog.default.t1 + : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + : +- SubqueryAlias s2 + : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -898,23 +891,22 @@ NATURAL FULL JOIN NATURAL FULL JOIN (SELECT udf(udf(name)) as name, udf(n) as s3_n, udf(3) as s3_2 FROM t3) s3 -- !query analysis -Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join FullOuter, (name#x = name#x) - :- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] - : +- Join FullOuter, (name#x = name#x) - : :- SubqueryAlias s1 - : : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s1_n#x, cast(udf(cast(cast(udf(cast(1 as string)) as int) as string)) as int) AS s1_1#x] - : : +- SubqueryAlias spark_catalog.default.t1 - : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - : +- SubqueryAlias s2 - : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s2_n#x, cast(udf(cast(2 as string)) as int) AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x, cast(udf(cast(3 as string)) as int) AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] ++- Join FullOuter, (name#x = name#x) + :- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] + : +- Join FullOuter, (name#x = name#x) + : :- SubqueryAlias s1 + : : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s1_n#x, cast(udf(cast(cast(udf(cast(1 as string)) as int) as string)) as int) AS s1_1#x] + : : +- SubqueryAlias spark_catalog.default.t1 + : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + : +- SubqueryAlias s2 + : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s2_n#x, cast(udf(cast(2 as string)) as int) AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x, cast(udf(cast(3 as string)) as int) AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -958,25 +950,24 @@ NATURAL FULL JOIN (SELECT name, udf(n) as s3_n FROM t3) as s3 ) ss2 -- !query analysis -Project [name#x, s1_n#x, s2_n#x, s2_2#x, s3_n#x] -+- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s2_n#x, s2_2#x, s3_n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s1 - : +- Project [name#x, n#x AS s1_n#x] - : +- SubqueryAlias spark_catalog.default.t1 - : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - +- SubqueryAlias ss2 - +- Project [name#x, s2_n#x, s2_2#x, s3_n#x] - +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s2_n#x, s2_2#x, s3_n#x] ++- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s1 + : +- Project [name#x, n#x AS s1_n#x] + : +- SubqueryAlias spark_catalog.default.t1 + : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + +- SubqueryAlias ss2 + +- Project [name#x, s2_n#x, s2_2#x, s3_n#x] + +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x] + +- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out index eb8f87b3cf59..e248692cf7ed 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out @@ -59,18 +59,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [udf(year)#x, dotNET#xL, Java#xL] -+- Project [udf(year)#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] - +- Aggregate [udf(year)#x], [udf(year)#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [udf(year)#x, course#x], [udf(year)#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [cast(udf(cast(year#x as string)) as int) AS udf(year)#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [udf(year)#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] ++- Aggregate [udf(year)#x], [udf(year)#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [udf(year)#x, course#x], [udf(year)#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [cast(udf(cast(year#x as string)) as int) AS udf(year)#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -80,16 +79,15 @@ PIVOT ( FOR year IN (2012, 2013) ) -- !query analysis -Project [course#x, 2012#xL, 2013#xL] -+- Project [course#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS 2012#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS 2013#xL] - +- Aggregate [course#x], [course#x, pivotfirst(year#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [course#x, year#x], [course#x, year#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [course#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS 2012#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS 2013#xL] ++- Aggregate [course#x], [course#x, pivotfirst(year#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [course#x, year#x], [course#x, year#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -101,18 +99,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET_udf(sum(earnings))#xL, dotNET_udf(avg(earnings))#x, Java_udf(sum(earnings))#xL, Java_udf(avg(earnings))#x] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x[0] AS dotNET_udf(avg(earnings))#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x[1] AS Java_udf(avg(earnings))#x] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, cast(udf(cast(avg(earnings#x) as string)) as double) AS CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x[0] AS dotNET_udf(avg(earnings))#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x[1] AS Java_udf(avg(earnings))#x] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, cast(udf(cast(avg(earnings#x) as string)) as double) AS CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -170,25 +167,24 @@ PIVOT ( FOR s IN (1, 2) ) -- !query analysis -Project [course#x, year#x, 1#xL, 2#xL] -+- Project [course#x, year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS 1#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS 2#xL] - +- Aggregate [course#x, year#x], [course#x, year#x, pivotfirst(s#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, 1, 2, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [course#x, year#x, s#x], [course#x, year#x, s#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, cast(udf(cast(s#x as string)) as int) AS s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [course#x, year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS 1#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS 2#xL] ++- Aggregate [course#x, year#x], [course#x, year#x, pivotfirst(s#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, 1, 2, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [course#x, year#x, s#x], [course#x, year#x, s#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, cast(udf(cast(s#x as string)) as int) AS s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -202,25 +198,24 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET_udf(sum(earnings))#xL, dotNET_udf(min(s))#x, Java_udf(sum(earnings))#xL, Java_udf(min(s))#x] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x[0] AS dotNET_udf(min(s))#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x[1] AS Java_udf(min(s))#x] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(min(s) as string)) AS INT)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, cast(udf(cast(min(s#x) as string)) as int) AS CAST(udf(cast(min(s) as string)) AS INT)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x[0] AS dotNET_udf(min(s))#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x[1] AS Java_udf(min(s))#x] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(min(s) as string)) AS INT)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, cast(udf(cast(min(s#x) as string)) as int) AS CAST(udf(cast(min(s) as string)) AS INT)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -234,25 +229,24 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET#xL, Java#xL] -+- Project [year#x, __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x[1] AS Java#xL] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum((earnings#x * s#x)) as string)) as bigint) AS CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x[1] AS Java#xL] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum((earnings#x * s#x)) as string)) as bigint) AS CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -264,18 +258,17 @@ PIVOT ( FOR y IN (2012, 2013) ) -- !query analysis -Project [2012_s#xL, 2013_s#xL, 2012_a#x, 2013_a#x, c#x] -+- Project [c#x, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[0] AS 2012_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[0] AS 2012_a#x, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[1] AS 2013_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[1] AS 2013_a#x] - +- Aggregate [c#x], [c#x, pivotfirst(y#x, CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x, pivotfirst(y#x, CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x] - +- Aggregate [c#x, y#x], [c#x, y#x, cast(udf(cast(sum(e#x) as string)) as bigint) AS CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, cast(udf(cast(avg(e#x) as string)) as double) AS CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[0] AS 2012_s#xL, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[1] AS 2013_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[0] AS 2012_a#x, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[1] AS 2013_a#x, c#x] ++- Aggregate [c#x], [c#x, pivotfirst(y#x, CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x, pivotfirst(y#x, CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x] + +- Aggregate [c#x, y#x], [c#x, y#x, cast(udf(cast(sum(e#x) as string)) as bigint) AS CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, cast(udf(cast(avg(e#x) as string)) as double) AS CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -287,18 +280,17 @@ PIVOT ( FOR y IN (2012 as firstYear, 2013 secondYear) ) -- !query analysis -Project [firstYear_s#xL, secondYear_s#xL, firstYear_a#x, secondYear_a#x, c#x] -+- Project [c#x, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[0] AS firstYear_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[0] AS firstYear_a#x, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[1] AS secondYear_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[1] AS secondYear_a#x] - +- Aggregate [c#x], [c#x, pivotfirst(y#x, CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x, pivotfirst(y#x, CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x] - +- Aggregate [c#x, y#x], [c#x, y#x, cast(udf(cast(sum(e#x) as string)) as bigint) AS CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, cast(udf(cast(avg(e#x) as string)) as double) AS CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[0] AS firstYear_s#xL, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[1] AS secondYear_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[0] AS firstYear_a#x, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[1] AS secondYear_a#x, c#x] ++- Aggregate [c#x], [c#x, pivotfirst(y#x, CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x, pivotfirst(y#x, CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x] + +- Aggregate [c#x, y#x], [c#x, y#x, cast(udf(cast(sum(e#x) as string)) as bigint) AS CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, cast(udf(cast(avg(e#x) as string)) as double) AS CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -371,18 +363,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET_udf(CEIL(udf(sum(earnings))))#xL, dotNET_a1#x, Java_udf(CEIL(udf(sum(earnings))))#xL, Java_a1#x] -+- Project [year#x, __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x[0] AS dotNET_udf(CEIL(udf(sum(earnings))))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[0] AS dotNET_a1#x, __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x[1] AS Java_udf(CEIL(udf(sum(earnings))))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[1] AS Java_a1#x] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x, pivotfirst(course#x, (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x, dotNET, Java, 0, 0) AS __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(CEIL(cast(udf(cast(sum(earnings#x) as string)) as bigint)) as string)) as bigint) AS CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)#xL, (avg(earnings#x) + cast(1 as double)) AS (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [year#x, __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x[0] AS dotNET_udf(CEIL(udf(sum(earnings))))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[0] AS dotNET_a1#x, __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x[1] AS Java_udf(CEIL(udf(sum(earnings))))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[1] AS Java_a1#x] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x, pivotfirst(course#x, (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x, dotNET, Java, 0, 0) AS __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(CEIL(cast(udf(cast(sum(earnings#x) as string)) as bigint)) as string)) as bigint) AS CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)#xL, (avg(earnings#x) + cast(1 as double)) AS (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -419,25 +410,24 @@ PIVOT ( FOR (course, year) IN (('dotNET', 2012), ('Java', 2013)) ) -- !query analysis -Project [s#x, {dotNET, 2012}#xL, {Java, 2013}#xL] -+- Project [s#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, 2012}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, 2013}#xL] - +- Aggregate [s#x], [s#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,2012], [Java,2013], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [s#x, named_struct(course, course#x, year, year#x)], [s#x, named_struct(course, course#x, year, year#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [s#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, 2012}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, 2013}#xL] ++- Aggregate [s#x], [s#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,2012], [Java,2013], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [s#x, named_struct(course, course#x, year, year#x)], [s#x, named_struct(course, course#x, year, year#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -451,25 +441,24 @@ PIVOT ( FOR (course, s) IN (('dotNET', 2) as c1, ('Java', 1) as c2) ) -- !query analysis -Project [year#x, c1#xL, c2#xL] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS c1#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS c2#xL] - +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,2], [Java,1], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS c1#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS c2#xL] ++- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,2], [Java,1], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -548,25 +537,24 @@ PIVOT ( FOR a IN (array(1, 1), array(2, 2)) ) -- !query analysis -Project [year#x, [1, 1]#xL, [2, 2]#xL] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS [1, 1]#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS [2, 2]#xL] - +- Aggregate [year#x], [year#x, pivotfirst(a#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [1,1], [2,2], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, a#x], [year#x, a#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [earnings#x, year#x, a#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS [1, 1]#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS [2, 2]#xL] ++- Aggregate [year#x], [year#x, pivotfirst(a#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [1,1], [2,2], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, a#x], [year#x, a#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [earnings#x, year#x, a#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -580,25 +568,24 @@ PIVOT ( FOR (course, a) IN (('dotNET', array(1, 1)), ('Java', array(2, 2))) ) -- !query analysis -Project [year#x, {dotNET, [1, 1]}#xL, {Java, [2, 2]}#xL] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, [1, 1]}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, [2, 2]}#xL] - +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,[1,1]], [Java,[2,2]], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, named_struct(course, course#x, a, a#x)], [year#x, named_struct(course, course#x, a, a#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, cast(udf(cast(year#x as string)) as int) AS year#x, a#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, [1, 1]}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, [2, 2]}#xL] ++- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,[1,1]], [Java,[2,2]], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, named_struct(course, course#x, a, a#x)], [year#x, named_struct(course, course#x, a, a#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, cast(udf(cast(year#x as string)) as int) AS year#x, a#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -612,25 +599,24 @@ PIVOT ( FOR s IN ((1, 'a'), (2, 'b')) ) -- !query analysis -Project [year#x, {1, a}#xL, {2, b}#xL] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {1, a}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {2, b}#xL] - +- Aggregate [year#x], [year#x, pivotfirst(s#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [1,a], [2,b], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, s#x], [year#x, s#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [earnings#x, year#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {1, a}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {2, b}#xL] ++- Aggregate [year#x], [year#x, pivotfirst(s#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [1,a], [2,b], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, s#x], [year#x, s#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [earnings#x, year#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -644,25 +630,24 @@ PIVOT ( FOR (course, s) IN (('dotNET', (1, 'a')), ('Java', (2, 'b'))) ) -- !query analysis -Project [year#x, {dotNET, {1, a}}#xL, {Java, {2, b}}#xL] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, {1, a}}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, {2, b}}#xL] - +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,[1,a]], [Java,[2,b]], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, year#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x,year#x,earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, {1, a}}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, {2, b}}#xL] ++- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,[1,a]], [Java,[2,b]], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, year#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x,year#x,earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x,a#x,m#x,s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -718,15 +703,14 @@ PIVOT ( FOR Course IN ('dotNET', 'Java') ) -- !query analysis -Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, dotNET#xL, Java#xL] -+- Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] - +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, pivotfirst(Course#x, CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x] - +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x, cast(udf(cast(sum(Earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, cast(udf(cast(a as string)) as string) AS a#x, cast(udf(cast(z as string)) as string) AS z#x, cast(udf(cast(b as string)) as string) AS b#x, cast(udf(cast(y as string)) as string) AS y#x, cast(udf(cast(c as string)) as string) AS c#x, cast(udf(cast(x as string)) as string) AS x#x, cast(udf(cast(d as string)) as string) AS d#x, cast(udf(cast(w as string)) as string) AS w#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x,year#x,earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] ++- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, pivotfirst(Course#x, CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x] + +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x, cast(udf(cast(sum(Earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, cast(udf(cast(a as string)) as string) AS a#x, cast(udf(cast(z as string)) as string) AS z#x, cast(udf(cast(b as string)) as string) AS b#x, cast(udf(cast(y as string)) as string) AS y#x, cast(udf(cast(c as string)) as string) AS c#x, cast(udf(cast(x as string)) as string) AS x#x, cast(udf(cast(d as string)) as string) AS d#x, cast(udf(cast(w as string)) as string) AS w#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x,year#x,earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out index 97410d3cdd36..bd281d68813c 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out @@ -39,80 +39,76 @@ CreateViewCommand `nt2`, select * from values SELECT * FROM nt1 left outer join nt2 using (k) -- !query analysis Project [k#x, v1#x, v2#x] -+- Project [k#x, v1#x, v2#x] - +- Join LeftOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k FROM nt1 left outer join nt2 using (k) -- !query analysis Project [k#x] -+- Project [k#x, v1#x, v2#x] - +- Join LeftOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.*, nt2.* FROM nt1 left outer join nt2 using (k) -- !query analysis Project [k#x, v1#x, k#x, v2#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join LeftOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.k, nt2.k FROM nt1 left outer join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join LeftOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -183,220 +179,209 @@ Sort [k#x ASC NULLS FIRST], true SELECT k, nt1.k FROM nt1 left outer join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x] - +- Join LeftOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k, nt2.k FROM nt1 left outer join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join LeftOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT * FROM nt1 left semi join nt2 using (k) -- !query analysis Project [k#x, v1#x] -+- Project [k#x, v1#x] - +- Join LeftSemi, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftSemi, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k FROM nt1 left semi join nt2 using (k) -- !query analysis Project [k#x] -+- Project [k#x, v1#x] - +- Join LeftSemi, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftSemi, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.* FROM nt1 left semi join nt2 using (k) -- !query analysis Project [k#x, v1#x] -+- Project [k#x, v1#x] - +- Join LeftSemi, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftSemi, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.k FROM nt1 left semi join nt2 using (k) -- !query analysis Project [k#x] -+- Project [k#x, v1#x] - +- Join LeftSemi, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftSemi, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k, nt1.k FROM nt1 left semi join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x] - +- Join LeftSemi, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftSemi, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT * FROM nt1 right outer join nt2 using (k) -- !query analysis Project [k#x, v1#x, v2#x] -+- Project [k#x, v1#x, v2#x] - +- Join RightOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join RightOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k FROM nt1 right outer join nt2 using (k) -- !query analysis Project [k#x] -+- Project [k#x, v1#x, v2#x] - +- Join RightOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join RightOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.*, nt2.* FROM nt1 right outer join nt2 using (k) -- !query analysis Project [k#x, v1#x, k#x, v2#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join RightOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join RightOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.k, nt2.k FROM nt1 right outer join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join RightOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join RightOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -446,120 +431,114 @@ Sort [key#x ASC NULLS FIRST], true SELECT k, nt1.k FROM nt1 right outer join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join RightOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join RightOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k, nt2.k FROM nt1 right outer join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x] - +- Join RightOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join RightOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT * FROM nt1 full outer join nt2 using (k) -- !query analysis -Project [k#x, v1#x, v2#x] -+- Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x] - +- Join FullOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] +Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x] ++- Join FullOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k FROM nt1 full outer join nt2 using (k) -- !query analysis -Project [k#x] -+- Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x] - +- Join FullOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] +Project [coalesce(k#x, k#x) AS k#x] ++- Join FullOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.*, nt2.* FROM nt1 full outer join nt2 using (k) -- !query analysis Project [k#x, v1#x, k#x, v2#x] -+- Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x, k#x, k#x] - +- Join FullOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join FullOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.k, nt2.k FROM nt1 full outer join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x, k#x, k#x] - +- Join FullOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join FullOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -608,121 +587,115 @@ Sort [key#x ASC NULLS FIRST], true -- !query SELECT k, nt1.k FROM nt1 full outer join nt2 using (k) -- !query analysis -Project [k#x, k#x] -+- Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x, k#x, k#x] - +- Join FullOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] +Project [coalesce(k#x, k#x) AS k#x, k#x] ++- Join FullOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k, nt2.k FROM nt1 full outer join nt2 using (k) -- !query analysis -Project [k#x, k#x] -+- Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x, k#x, k#x] - +- Join FullOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] +Project [coalesce(k#x, k#x) AS k#x, k#x] ++- Join FullOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT * FROM nt1 full outer join nt2 using (k) -- !query analysis -Project [k#x, v1#x, v2#x] -+- Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x] - +- Join FullOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] +Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x] ++- Join FullOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k FROM nt1 inner join nt2 using (k) -- !query analysis Project [k#x] -+- Project [k#x, v1#x, v2#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.*, nt2.* FROM nt1 inner join nt2 using (k) -- !query analysis Project [k#x, v1#x, k#x, v2#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.k, nt2.k FROM nt1 inner join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -772,40 +745,38 @@ Sort [key#x ASC NULLS FIRST], true SELECT k, nt1.k FROM nt1 inner join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k, nt2.k FROM nt1 inner join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query From 6b40399e02279ad3bee5ec83389d5728d099bd22 Mon Sep 17 00:00:00 2001 From: ashahid Date: Tue, 19 Dec 2023 13:20:33 -0800 Subject: [PATCH 097/129] SPARK-45959. reworked and simplified the code. Instead of Collapsing before analyse, now collapsing after analyze --- .../analysis/ColumnResolutionHelper.scala | 18 +++++++++++------- .../sql/internal/EarlyCollapseProject.scala | 18 ++++++++++++------ 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala index cd68b21b06dc..0c2d08371dad 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala @@ -65,11 +65,14 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase { } // If some attributes used by expressions are resolvable only on the rewritten child // plan, we need to add them into original projection. - lazy val (missingAttrsFromOutput, missingAttrsFromDroppedAttr) = { - val missing = (AttributeSet(newExprs) -- u.outputSet) - missing.intersect(newChild.outputSet) -> - missing.intersect(u.getTagValue(LogicalPlan.DROPPED_NAMED_EXPRESSIONS). + val (missingAttrsFromOutput, missingAttrsFromDroppedAttr) = { + val missing1 = (AttributeSet(newExprs) -- u.outputSet) + val fulfilledFromOutput = missing1.intersect(newChild.outputSet) + val missing2 = missing1 -- fulfilledFromOutput + val fulfilledFromDroppedCol = missing2.intersect(u.getTagValue( + LogicalPlan.DROPPED_NAMED_EXPRESSIONS). map(sq => AttributeSet(sq.map(_.toAttribute))).getOrElse(AttributeSet.empty)) + fulfilledFromOutput -> fulfilledFromDroppedCol } u match { case p: Project => @@ -77,13 +80,14 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase { getOrElse(Seq.empty) val newProject = Project(p.projectList ++ missingAttrsFromOutput ++ missingAttrsFromDroppedAttr.map(attr => - droppedNamedExprs.find(_.toAttribute == attr).get), newChild) + droppedNamedExprs.find(_.toAttribute.canonicalized == attr.canonicalized).get), + newChild) newProject.copyTagsFrom(p) (newExprs, newProject) case a @ Aggregate(groupExprs, aggExprs, child) => - if (missingAttrsFromOutput.forall(attr => groupExprs.exists( - _.semanticEquals(attr)))) { + if (missingAttrsFromOutput.forall(attr => + groupExprs.exists(_.semanticEquals(attr)))) { // All the missing attributes are grouping expressions, valid case. (newExprs, a.copy(aggregateExpressions = aggExprs ++ missingAttrsFromOutput, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala index af87277e56ab..f620ec96ea9d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala @@ -43,9 +43,9 @@ private[sql] object EarlyCollapseProject { projList.flatMap(ne => ne match { case _: AttributeReference => Seq.empty[(Attribute, Expression)] - case al@Alias(expr, _) => - if (childOutput.contains(al.toAttribute)) { - Seq(al.toAttribute -> expr) + case al@Alias(attr: AttributeReference, _) => + if (childOutput.contains(attr)) { + Seq(al.toAttribute -> transferMetadata(al.toAttribute, attr)) } else { Seq.empty[(Attribute, Expression)] } @@ -87,7 +87,7 @@ private[sql] object EarlyCollapseProject { attribsRemappedInProj.get(attr).orElse(projList.find( _.toAttribute.canonicalized == attr.canonicalized).map { case al: Alias => al.child - case x => x + case x => attr }).getOrElse(attr) }).asInstanceOf[NamedExpression] } @@ -97,8 +97,14 @@ private[sql] object EarlyCollapseProject { val newProject = Project(remappedNewProjList, child) val droppedNamedExprs = projList.filter(ne => remappedNewProjList.forall(_.toAttribute != ne.toAttribute)) - val newDroppedList = p.getTagValue(LogicalPlan.DROPPED_NAMED_EXPRESSIONS).map( - _ ++ droppedNamedExprs).getOrElse(droppedNamedExprs) + val prevDroppedColsPart1 = p.getTagValue(LogicalPlan.DROPPED_NAMED_EXPRESSIONS). + getOrElse(Seq.empty) + // remove any attribs which have been added back in the new project list + val prevDroppedColsPart2 = prevDroppedColsPart1.filterNot(x => + remappedNewProjList.exists(y => y.toAttribute == x.toAttribute || y.name == x.name)) + val prevDroppedColsFinal = prevDroppedColsPart2.filterNot(x => + droppedNamedExprs.exists(y => y == x || y.name == x.name)) + val newDroppedList = droppedNamedExprs ++ prevDroppedColsFinal if (newDroppedList.nonEmpty) { newProject.setTagValue(LogicalPlan.DROPPED_NAMED_EXPRESSIONS, newDroppedList) } From 4392ac909e85fe4c0d8264c026b5011092c7b7dd Mon Sep 17 00:00:00 2001 From: ashahid Date: Tue, 19 Dec 2023 13:56:23 -0800 Subject: [PATCH 098/129] SPARK-45959. reworked and simplified the code. Instead of Collapsing before analyse, now collapsing after analyze --- .../query-tests/explain-results/crosstab.explain | 7 +++---- .../query-tests/explain-results/describe.explain | 9 ++++----- .../query-tests/explain-results/summary.explain | 7 +++---- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/crosstab.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/crosstab.explain index a30cd136e8db..0487d7360201 100644 --- a/connector/connect/common/src/test/resources/query-tests/explain-results/crosstab.explain +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/crosstab.explain @@ -1,5 +1,4 @@ Project [a_b#0] -+- Project [a_b#0] - +- Aggregate [a_b#0], [a_b#0, pivotfirst(__pivot_col#0, count(1) AS count#0L, 0, 0) AS __pivot_count(1) AS count AS `count(1) AS count`#0] - +- Aggregate [CASE WHEN isnull(a#0) THEN null ELSE cast(a#0 as string) END, CASE WHEN isnull(b#0) THEN null ELSE regexp_replace(cast(b#0 as string), `, , 1) END], [CASE WHEN isnull(a#0) THEN null ELSE cast(a#0 as string) END AS a_b#0, CASE WHEN isnull(b#0) THEN null ELSE regexp_replace(cast(b#0 as string), `, , 1) END AS __pivot_col#0, count(1) AS count(1) AS count#0L] - +- LocalRelation , [id#0L, a#0, b#0] ++- Aggregate [a_b#0], [a_b#0, pivotfirst(__pivot_col#0, count(1) AS count#0L, 0, 0) AS __pivot_count(1) AS count AS `count(1) AS count`#0] + +- Aggregate [CASE WHEN isnull(a#0) THEN null ELSE cast(a#0 as string) END, CASE WHEN isnull(b#0) THEN null ELSE regexp_replace(cast(b#0 as string), `, , 1) END], [CASE WHEN isnull(a#0) THEN null ELSE cast(a#0 as string) END AS a_b#0, CASE WHEN isnull(b#0) THEN null ELSE regexp_replace(cast(b#0 as string), `, , 1) END AS __pivot_col#0, count(1) AS count(1) AS count#0L] + +- LocalRelation , [id#0L, a#0, b#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/describe.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/describe.explain index b203f715c71a..2e20694bd784 100644 --- a/connector/connect/common/src/test/resources/query-tests/explain-results/describe.explain +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/describe.explain @@ -1,6 +1,5 @@ Project [summary#0, element_at(id#0, summary#0, None, false) AS id#0, element_at(b#0, summary#0, None, false) AS b#0] -+- Project [id#0, b#0, summary#0] - +- Generate explode([count,mean,stddev,min,max]), false, [summary#0] - +- Aggregate [map(cast(count as string), cast(count(id#0L) as string), cast(mean as string), cast(avg(id#0L) as string), cast(stddev as string), cast(stddev(cast(id#0L as double)) as string), cast(min as string), cast(min(id#0L) as string), cast(max as string), cast(max(id#0L) as string)) AS id#0, map(cast(count as string), cast(count(b#0) as string), cast(mean as string), cast(avg(b#0) as string), cast(stddev as string), cast(stddev(b#0) as string), cast(min as string), cast(min(b#0) as string), cast(max as string), cast(max(b#0) as string)) AS b#0] - +- Project [id#0L, b#0] - +- LocalRelation , [id#0L, a#0, b#0] ++- Generate explode([count,mean,stddev,min,max]), false, [summary#0] + +- Aggregate [map(cast(count as string), cast(count(id#0L) as string), cast(mean as string), cast(avg(id#0L) as string), cast(stddev as string), cast(stddev(cast(id#0L as double)) as string), cast(min as string), cast(min(id#0L) as string), cast(max as string), cast(max(id#0L) as string)) AS id#0, map(cast(count as string), cast(count(b#0) as string), cast(mean as string), cast(avg(b#0) as string), cast(stddev as string), cast(stddev(b#0) as string), cast(min as string), cast(min(b#0) as string), cast(max as string), cast(max(b#0) as string)) AS b#0] + +- Project [id#0L, b#0] + +- LocalRelation , [id#0L, a#0, b#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/summary.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/summary.explain index 3ce8a26f1383..f75ef6c0885f 100644 --- a/connector/connect/common/src/test/resources/query-tests/explain-results/summary.explain +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/summary.explain @@ -1,5 +1,4 @@ Project [summary#0, element_at(id#0, summary#0, None, false) AS id#0, element_at(a#0, summary#0, None, false) AS a#0, element_at(b#0, summary#0, None, false) AS b#0] -+- Project [id#0, a#0, b#0, summary#0] - +- Generate explode([mean,min]), false, [summary#0] - +- Aggregate [map(cast(mean as string), cast(avg(id#0L) as string), cast(min as string), cast(min(id#0L) as string)) AS id#0, map(cast(mean as string), cast(avg(a#0) as string), cast(min as string), cast(min(a#0) as string)) AS a#0, map(cast(mean as string), cast(avg(b#0) as string), cast(min as string), cast(min(b#0) as string)) AS b#0] - +- LocalRelation , [id#0L, a#0, b#0] ++- Generate explode([mean,min]), false, [summary#0] + +- Aggregate [map(cast(mean as string), cast(avg(id#0L) as string), cast(min as string), cast(min(id#0L) as string)) AS id#0, map(cast(mean as string), cast(avg(a#0) as string), cast(min as string), cast(min(a#0) as string)) AS a#0, map(cast(mean as string), cast(avg(b#0) as string), cast(min as string), cast(min(b#0) as string)) AS b#0] + +- LocalRelation , [id#0L, a#0, b#0] From 9660cd39ccaf5fc02d3e9ad4bdc918e369a590ff Mon Sep 17 00:00:00 2001 From: ashahid Date: Tue, 19 Dec 2023 16:06:25 -0800 Subject: [PATCH 099/129] SPARK-45959. reworked and simplified the code. Instead of Collapsing before analyse, now collapsing after analyze --- .../scala/org/apache/spark/sql/Dataset.scala | 3 ++- .../spark/sql/execution/CacheManager.scala | 25 +++++++++++++------ .../spark/sql/execution/command/tables.scala | 5 ++-- .../datasources/v2/DataSourceV2Strategy.scala | 5 ++-- .../command/AlterTableRenameSuiteBase.scala | 4 ++- 5 files changed, 29 insertions(+), 13 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 4c13714fd0ab..1b7bbc44c9d7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -3908,7 +3908,8 @@ class Dataset[T] private[sql]( */ def storageLevel: StorageLevel = { sparkSession.sharedState.cacheManager.lookupCachedData(this).map { cachedData => - cachedData.cachedRepresentation.toOption.get.cacheBuilder.storageLevel + cachedData.cachedRepresentation.fold(CacheManager.inMemoryRelationExtractor, identity). + cacheBuilder.storageLevel }.getOrElse(StorageLevel.NONE) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index 39ecdf161a76..1fc55c650d22 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -75,7 +75,8 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { /** Clears all cached tables. */ def clearCache(): Unit = this.synchronized { - cachedData.foreach(_.cachedRepresentation.toOption.get.cacheBuilder.clearCache()) + cachedData.foreach(_.cachedRepresentation.fold(CacheManager.inMemoryRelationExtractor, identity) + .cacheBuilder.clearCache()) cachedData = IndexedSeq[CachedData]() } @@ -220,7 +221,8 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { this.synchronized { cachedData = cachedData.filterNot(cd => plansToUncache.exists(_ eq cd)) } - plansToUncache.foreach { _.cachedRepresentation.toOption.get.cacheBuilder.clearCache(blocking) } + plansToUncache.foreach { _.cachedRepresentation. + fold(CacheManager.inMemoryRelationExtractor, identity).cacheBuilder.clearCache(blocking) } // Re-compile dependent cached queries after removing the cached query. if (!cascade) { @@ -237,7 +239,8 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { // 2) The buffer has been cleared, but `isCachedColumnBuffersLoaded` returns true, then we // will keep it as it is. It means the physical plan has been re-compiled already in the // other thread. - val cacheAlreadyLoaded = cd.cachedRepresentation.toOption.get.cacheBuilder. + val cacheAlreadyLoaded = cd.cachedRepresentation. + fold(CacheManager.inMemoryRelationExtractor, identity).cacheBuilder. isCachedColumnBuffersLoaded cd.plan.exists(isMatchedPlan) && !cacheAlreadyLoaded }) @@ -251,8 +254,9 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { column: Seq[Attribute]): Unit = { val relation = cachedData.cachedRepresentation val (rowCount, newColStats) = - CommandUtils.computeColumnStats(sparkSession, relation.toOption.get, column) - relation.toOption.get.updateStats(rowCount, newColStats) + CommandUtils.computeColumnStats(sparkSession, relation.merge, column) + relation.fold(CacheManager.inMemoryRelationExtractor, identity). + updateStats(rowCount, newColStats) } /** @@ -274,11 +278,13 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { cachedData = cachedData.filterNot(cd => needToRecache.exists(_ eq cd)) } needToRecache.foreach { cd => - cd.cachedRepresentation.toOption.get.cacheBuilder.clearCache() + cd.cachedRepresentation.fold(CacheManager.inMemoryRelationExtractor, identity). + cacheBuilder.clearCache() val sessionWithConfigsOff = getOrCloneSessionWithConfigsOff(spark) val newCache = sessionWithConfigsOff.withActive { val qe = sessionWithConfigsOff.sessionState.executePlan(cd.plan) - InMemoryRelation(cd.cachedRepresentation.toOption.get.cacheBuilder, qe) + InMemoryRelation(cd.cachedRepresentation. + fold(CacheManager.inMemoryRelationExtractor, identity).cacheBuilder, qe) } val recomputedPlan = cd.copy(cachedRepresentation = Right(newCache)) this.synchronized { @@ -532,3 +538,8 @@ private case class Replaceable(attribToUse: Attribute) extends LeafExpression { throw new UnsupportedOperationException() override def dataType: DataType = attribToUse.dataType } + +object CacheManager { + val inMemoryRelationExtractor = (plan: LogicalPlan) => plan.collectLeaves().head. + asInstanceOf[InMemoryRelation] +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index fc6b6e942b0e..7d0c907d46ea 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -39,6 +39,7 @@ import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIfNeed import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.TableIdentifierHelper import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} +import org.apache.spark.sql.execution.CacheManager import org.apache.spark.sql.execution.datasources.DataSource import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat import org.apache.spark.sql.execution.datasources.json.JsonFileFormat @@ -201,8 +202,8 @@ case class AlterTableRenameCommand( // If `optStorageLevel` is defined, the old table was cached. val optCachedData = sparkSession.sharedState.cacheManager.lookupCachedData( sparkSession.table(oldName.unquotedString)) - val optStorageLevel = optCachedData.map(_.cachedRepresentation.toOption.get.cacheBuilder. - storageLevel) + val optStorageLevel = optCachedData.map(_.cachedRepresentation. + fold(CacheManager.inMemoryRelationExtractor, identity).cacheBuilder.storageLevel) if (optStorageLevel.isDefined) { CommandUtils.uncacheTableOrView(sparkSession, oldName.unquotedString) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 6cb963f1e8bb..925ca11110aa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -41,7 +41,7 @@ import org.apache.spark.sql.connector.read.LocalScan import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream} import org.apache.spark.sql.connector.write.V1Write import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} -import org.apache.spark.sql.execution.{FilterExec, InSubqueryExec, LeafExecNode, LocalTableScanExec, ProjectExec, RowDataSourceScanExec, SparkPlan} +import org.apache.spark.sql.execution.{CacheManager, FilterExec, InSubqueryExec, LeafExecNode, LocalTableScanExec, ProjectExec, RowDataSourceScanExec, SparkPlan} import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, LogicalRelation, PushableColumnAndNestedColumn} import org.apache.spark.sql.execution.streaming.continuous.{WriteToContinuousDataSource, WriteToContinuousDataSourceExec} import org.apache.spark.sql.internal.StaticSQLConf.WAREHOUSE_PATH @@ -86,7 +86,8 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat val cache = session.sharedState.cacheManager.lookupCachedData(v2Relation) session.sharedState.cacheManager.uncacheQuery(session, v2Relation, cascade = true) if (cache.exists(_.cachedRepresentation.isRight)) { - val cacheLevel = cache.get.cachedRepresentation.toOption.get.cacheBuilder.storageLevel + val cacheLevel = cache.get.cachedRepresentation. + fold(CacheManager.inMemoryRelationExtractor, identity).cacheBuilder.storageLevel Some(cacheLevel) } else { None diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala index 0d7c336f59fc..3a87343efb34 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.execution.command +import org.apache.spark.sql.execution.CacheManager import org.apache.spark.sql.{AnalysisException, QueryTest, Row} import org.apache.spark.storage.StorageLevel @@ -73,7 +74,8 @@ trait AlterTableRenameSuiteBase extends QueryTest with DDLCommandTestUtils { def getStorageLevel(tableName: String): StorageLevel = { val table = spark.table(tableName) val cachedData = spark.sharedState.cacheManager.lookupCachedData(table).get - cachedData.cachedRepresentation.toOption.get.cacheBuilder.storageLevel + cachedData.cachedRepresentation.fold(CacheManager.inMemoryRelationExtractor, identity). + cacheBuilder.storageLevel } sql(s"CREATE TABLE $src (c0 INT) $defaultUsing") sql(s"INSERT INTO $src SELECT 0") From f2e7e8d54e3fd78298449dc97e2ff4459333d3dd Mon Sep 17 00:00:00 2001 From: ashahid Date: Tue, 19 Dec 2023 16:44:42 -0800 Subject: [PATCH 100/129] SPARK-45959. reworked and simplified the code. Instead of Collapsing before analyse, now collapsing after analyze --- .../spark/sql/execution/command/AlterTableRenameSuiteBase.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala index 3a87343efb34..d872fdac5cfb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala @@ -17,8 +17,8 @@ package org.apache.spark.sql.execution.command -import org.apache.spark.sql.execution.CacheManager import org.apache.spark.sql.{AnalysisException, QueryTest, Row} +import org.apache.spark.sql.execution.CacheManager import org.apache.spark.storage.StorageLevel /** From be8cf1edb94a397dcf855fb3ccd38be59631783d Mon Sep 17 00:00:00 2001 From: ashahid Date: Wed, 20 Dec 2023 00:03:31 -0800 Subject: [PATCH 101/129] SPARK-45959. reworked and simplified the code. Instead of Collapsing before analyse, now collapsing after analyze --- .../spark/sql/execution/CacheManager.scala | 31 +++++++++++++++++-- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index 1fc55c650d22..475fec0ef87f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -364,6 +364,23 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { index -> matchIndexInCdPlanProj }.partition(_._2 != -1) + // Now there is a possible case wherea literal is present in IMR as attribute + // and the incoming project also has that literal somewhere in the alias. Though + // we do not need to read it but looks like the deserializer fails if we skip that + // literal in the projection enforced on IMR. so in effect even if we do not + // require an attribute it still needs to be present in the projection forced + // also its possible that some attribute from IMR can be used in subexpression + // of the incoming projection. so we have to handle that + val unusedAttribsOfCDPlanToGenIncomingAttr = + cdPlanProject.projectList.indices.filterNot(i => + incomingToCachedPlanIndxMapping.exists(_._2 == i)).map(i => { + val cdAttrib = cdPlanProject.projectList(i) + i -> AttributeReference(cdAttrib.name, cdAttrib.dataType, + cdAttrib.nullable, cdAttrib.metadata)(qualifier = cdAttrib.qualifier) + }) + + + // If expressions of inComingProjNoDirectMapping can be expressed in terms of the // incoming attribute refs or incoming alias exprs, which can be mapped directly // to the CachedPlan's output, we are good. so lets transform such indirectly @@ -376,7 +393,11 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { val ne = incomingProject.projectList(incomngIndex) val modifiedNe = ne.transformDown { case expr => directlyMappedIncomingProjs.find(ne => ne.toAttribute == expr - || ne.children.headOption.contains(expr)). + || ne.children.headOption.contains(expr)).orElse( + unusedAttribsOfCDPlanToGenIncomingAttr.find { + case(i, _) => val cdNe = canonicalizedCdProj.projectList(i) + cdNe.children.headOption.contains(expr.canonicalized) + }.map(_._2)). map(ne => Replaceable(ne.toAttribute)).getOrElse(expr) }.asInstanceOf[NamedExpression] incomngIndex -> modifiedNe @@ -389,8 +410,12 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { }.toMap if (transformedIndirectlyMappableExpr.forall(_._2.references.isEmpty)) { - val projectionToForceOnCdPlan = cachedPlan.output.flatMap(cdAttr => - cdAttribToInAttrib.get(cdAttr).map(Seq(_)).getOrElse(Seq.empty)) + val projectionToForceOnCdPlan = cachedPlan.output.zipWithIndex.map { + case (cdAttr, i) => + cdAttribToInAttrib.getOrElse(cdAttr, + unusedAttribsOfCDPlanToGenIncomingAttr.find(_._1 == i).map(_._2).get) + } + val modifiedInProj = incomingProject.projectList.zipWithIndex.map { case (ne, indx) => if (incomingToCachedPlanIndxMapping.exists(_._1 == indx)) { ne.toAttribute From 0d9ea2add08deced076405207509278ec33d4b88 Mon Sep 17 00:00:00 2001 From: ashahid Date: Wed, 20 Dec 2023 13:02:49 -0800 Subject: [PATCH 102/129] SPARK-45959. reworked and simplified the code. Instead of Collapsing before analyse, now collapsing after analyze --- .../apache/spark/sql/internal/EarlyCollapseProject.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala index f620ec96ea9d..7569b202f182 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala @@ -53,7 +53,7 @@ private[sql] object EarlyCollapseProject { case _ => Seq.empty[(Attribute, Expression)] })) - if (tinkeredOrNewNamedExprs.exists(_.collectFirst { + if ((tinkeredOrNewNamedExprs ++ p.projectList).exists(_.collectFirst { // we will not flatten if expressions contain windows or aggregate as if they // are collapsed it can cause recalculation of functions and inefficiency with // separate group by clauses @@ -105,6 +105,7 @@ private[sql] object EarlyCollapseProject { val prevDroppedColsFinal = prevDroppedColsPart2.filterNot(x => droppedNamedExprs.exists(y => y == x || y.name == x.name)) val newDroppedList = droppedNamedExprs ++ prevDroppedColsFinal + newProject.copyTagsFrom(p) if (newDroppedList.nonEmpty) { newProject.setTagValue(LogicalPlan.DROPPED_NAMED_EXPRESSIONS, newDroppedList) } @@ -129,8 +130,7 @@ private[sql] object EarlyCollapseProject { case al: Alias => val newMdBuilder = new MetadataBuilder().withMetadata(from.metadata) val newMd = newMdBuilder.build() - - al.copy()(exprId = al.exprId, qualifier = al.qualifier, + al.copy()(exprId = al.exprId, qualifier = from.qualifier, nonInheritableMetadataKeys = al.nonInheritableMetadataKeys, explicitMetadata = Option(newMd)) From 49c4f4ccc50c4f7bcb7bbbf5f9111d2b4f2d6759 Mon Sep 17 00:00:00 2001 From: ashahid Date: Wed, 20 Dec 2023 16:14:10 -0800 Subject: [PATCH 103/129] SPARK-45959. reworked and simplified the code. Instead of Collapsing before analyse, now collapsing after analyze --- .../sql/internal/EarlyCollapseProject.scala | 227 +++++++++++------- .../analyzer-results/natural-join.sql.out | 93 ++++--- .../analyzer-results/postgreSQL/join.sql.out | 30 ++- .../udf/udf-natural-join.sql.out | 31 ++- 4 files changed, 212 insertions(+), 169 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala index 7569b202f182..568951b3b485 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala @@ -17,111 +17,66 @@ package org.apache.spark.sql.internal +import scala.collection.mutable import scala.util.{Failure, Success, Try} -import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, AttributeReference, Expression, NamedExpression, UserDefinedExpression} -import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, Window} +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, AttributeReference, AttributeSet, Expression, NamedExpression, UserDefinedExpression} +import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project, UnaryNode, Window} import org.apache.spark.sql.types.{Metadata, MetadataBuilder} import org.apache.spark.util.Utils + private[sql] object EarlyCollapseProject { def unapply(logicalPlan: LogicalPlan): Option[LogicalPlan] = logicalPlan match { - case newP @ Project(newProjList, p @ Project(projList, child)) if - p.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty && - newP.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty && - !child.isInstanceOf[Window] => - // In the new column list identify those Named Expressions which are just attributes and - // hence pass thru - val (_, tinkeredOrNewNamedExprs) = newProjList.partition { - case _: Attribute => true - case _ => false - } - - val childOutput = child.outputSet - val attribsRemappedInProj = AttributeMap( - projList.flatMap(ne => ne match { - case _: AttributeReference => Seq.empty[(Attribute, Expression)] + case newP @ Project(newProjList, p @ Project(projList, child)) + if checkEarlyCollapsePossible(p, newP, child) => + collapseProjectEarly(newProjList, p, projList, child) - case al@Alias(attr: AttributeReference, _) => - if (childOutput.contains(attr)) { - Seq(al.toAttribute -> transferMetadata(al.toAttribute, attr)) - } else { - Seq.empty[(Attribute, Expression)] - } - - case _ => Seq.empty[(Attribute, Expression)] - })) - - if ((tinkeredOrNewNamedExprs ++ p.projectList).exists(_.collectFirst { - // we will not flatten if expressions contain windows or aggregate as if they - // are collapsed it can cause recalculation of functions and inefficiency with - // separate group by clauses - case ex if !ex.deterministic => ex - case ex: UserDefinedExpression => ex - }.nonEmpty)) { - None - } else { - val remappedNewProjListResult = Try { - newProjList.map { - case attr: AttributeReference => projList.find( - _.toAttribute.canonicalized == attr.canonicalized).map { - case al: Alias => - if (attr.name == al.name) { - transferMetadata(attr, al) - } else { - // To Handle the case of change of (Caps/lowercase) via toSchema resulting - // in rename - transferMetadata(attr, al.copy(name = attr.name)( - exprId = al.exprId, qualifier = al.qualifier, - explicitMetadata = al.explicitMetadata, - nonInheritableMetadataKeys = al.nonInheritableMetadataKeys)) - } - - case _: AttributeReference => attr - }.getOrElse(attr) - - case anyOtherExpr => - (anyOtherExpr transformUp { - case attr: AttributeReference => - attribsRemappedInProj.get(attr).orElse(projList.find( - _.toAttribute.canonicalized == attr.canonicalized).map { - case al: Alias => al.child - case x => attr - }).getOrElse(attr) - }).asInstanceOf[NamedExpression] - } + case newP @ Project(newProjList, f @ Filter(_, filterChild: UnaryNode)) => + // check if its case of nested filters followed by project + val filterNodes = mutable.ListBuffer(f) + var projectAtEnd: Option[Project] = None + var keepGoing = true + var currentChild = filterChild + while(keepGoing) { + currentChild match { + case p: Project => projectAtEnd = Option(p) + keepGoing = false + case filter @ Filter(_, u: UnaryNode) => + filterNodes += filter + currentChild = u + case _ => keepGoing = false } - remappedNewProjListResult match { - case Success(remappedNewProjList) => - val newProject = Project(remappedNewProjList, child) - val droppedNamedExprs = projList.filter(ne => - remappedNewProjList.forall(_.toAttribute != ne.toAttribute)) - val prevDroppedColsPart1 = p.getTagValue(LogicalPlan.DROPPED_NAMED_EXPRESSIONS). - getOrElse(Seq.empty) - // remove any attribs which have been added back in the new project list - val prevDroppedColsPart2 = prevDroppedColsPart1.filterNot(x => - remappedNewProjList.exists(y => y.toAttribute == x.toAttribute || y.name == x.name)) - val prevDroppedColsFinal = prevDroppedColsPart2.filterNot(x => - droppedNamedExprs.exists(y => y == x || y.name == x.name)) - val newDroppedList = droppedNamedExprs ++ prevDroppedColsFinal - newProject.copyTagsFrom(p) - if (newDroppedList.nonEmpty) { - newProject.setTagValue(LogicalPlan.DROPPED_NAMED_EXPRESSIONS, newDroppedList) - } - Option(newProject) - - case Failure(x) => if (Utils.isTesting) { - throw x - } else { - None - } + } + if (projectAtEnd.isDefined && + filterNodes.map(_.condition.references).reduce(_ ++ _). + subsetOf( + AttributeSet(newProjList.filter(_.isInstanceOf[AttributeReference]) + .map(_.toAttribute)))) { + val p = projectAtEnd.get + val child = p.child + if (checkEarlyCollapsePossible(p, newP, child)) { + val newProjOpt = collapseProjectEarly(newProjList, p, p.projectList, child) + newProjOpt.map(collapsedProj => { + val lastFilterMod = filterNodes.last.copy(child = collapsedProj) + filterNodes.dropRight(1).foldRight(lastFilterMod)((f, c) => f.copy(child = c)) + }) + } else { + None } + } else { + None } case _ => None } + private def checkEarlyCollapsePossible(p: Project, newP: Project, child: LogicalPlan): Boolean = + p.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty && + newP.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty && + !child.isInstanceOf[Window] + private def transferMetadata(from: Attribute, to: NamedExpression): NamedExpression = if (from.metadata == Metadata.empty) { to @@ -138,4 +93,98 @@ private[sql] object EarlyCollapseProject { exprId = attr.exprId, qualifier = from.qualifier) } } + + def collapseProjectEarly( + newProjList: Seq[NamedExpression], + p: Project, + projList: Seq[NamedExpression], + child: LogicalPlan): Option[Project] = { + // In the new column list identify those Named Expressions which are just attributes and + // hence pass thru + val (_, tinkeredOrNewNamedExprs) = newProjList.partition { + case _: Attribute => true + case _ => false + } + + val childOutput = child.outputSet + val attribsRemappedInProj = AttributeMap( + projList.flatMap(ne => ne match { + case _: AttributeReference => Seq.empty[(Attribute, Expression)] + + case al@Alias(attr: AttributeReference, _) => + if (childOutput.contains(attr)) { + Seq(al.toAttribute -> transferMetadata(al.toAttribute, attr)) + } else { + Seq.empty[(Attribute, Expression)] + } + + case _ => Seq.empty[(Attribute, Expression)] + })) + + if ((tinkeredOrNewNamedExprs ++ p.projectList).exists(_.collectFirst { + // we will not flatten if expressions contain windows or aggregate as if they + // are collapsed it can cause recalculation of functions and inefficiency with + // separate group by clauses + case ex if !ex.deterministic => ex + case ex: UserDefinedExpression => ex + }.nonEmpty)) { + None + } else { + val remappedNewProjListResult = Try { + newProjList.map { + case attr: AttributeReference => projList.find( + _.toAttribute.canonicalized == attr.canonicalized).map { + case al: Alias => + if (attr.name == al.name) { + transferMetadata(attr, al) + } else { + // To Handle the case of change of (Caps/lowercase) via toSchema resulting + // in rename + transferMetadata(attr, al.copy(name = attr.name)( + exprId = al.exprId, qualifier = al.qualifier, + explicitMetadata = al.explicitMetadata, + nonInheritableMetadataKeys = al.nonInheritableMetadataKeys)) + } + + case _: AttributeReference => attr + }.getOrElse(attr) + + case anyOtherExpr => + (anyOtherExpr transformUp { + case attr: AttributeReference => + attribsRemappedInProj.get(attr).orElse(projList.find( + _.toAttribute.canonicalized == attr.canonicalized).map { + case al: Alias => al.child + case _ => attr + }).getOrElse(attr) + }).asInstanceOf[NamedExpression] + } + } + remappedNewProjListResult match { + case Success(remappedNewProjList) => + val newProject = Project(remappedNewProjList, child) + val droppedNamedExprs = projList.filter(ne => + remappedNewProjList.forall(_.toAttribute != ne.toAttribute)) + val prevDroppedColsPart1 = p.getTagValue(LogicalPlan.DROPPED_NAMED_EXPRESSIONS). + getOrElse(Seq.empty) + // remove any attribs which have been added back in the new project list + val prevDroppedColsPart2 = prevDroppedColsPart1.filterNot(x => + remappedNewProjList.exists(y => y.toAttribute == x.toAttribute || y.name == x.name)) + val prevDroppedColsFinal = prevDroppedColsPart2.filterNot(x => + droppedNamedExprs.exists(y => y == x || y.name == x.name)) + val newDroppedList = droppedNamedExprs ++ prevDroppedColsFinal + newProject.copyTagsFrom(p) + if (newDroppedList.nonEmpty) { + newProject.setTagValue(LogicalPlan.DROPPED_NAMED_EXPRESSIONS, newDroppedList) + } + Option(newProject) + + case Failure(x) => if (Utils.isTesting) { + throw x + } else { + None + } + } + } + } } diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out index d01e2b7403ae..11887c5811c1 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out @@ -89,22 +89,21 @@ Project [k#x, v1#x, v2#x] -- !query SELECT * FROM nt1 natural join nt2 where k = "one" -- !query analysis -Project [k#x, v1#x, v2#x] -+- Filter (k#x = one) - +- Project [k#x, v1#x, v2#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] +Filter (k#x = one) ++- Project [k#x, v1#x, v2#x] + +- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -191,22 +190,21 @@ Project [k#x] -- !query SELECT k FROM nt1 natural join nt2 where k = "one" -- !query analysis -Project [k#x] -+- Filter (k#x = one) - +- Project [k#x, v1#x, v2#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] +Filter (k#x = one) ++- Project [k#x] + +- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -394,22 +392,21 @@ Sort [key#x ASC NULLS FIRST], true -- !query SELECT nt1.k, nt2.k FROM nt1 natural join nt2 where k = "one" -- !query analysis -Project [k#x, k#x] -+- Filter (k#x = one) - +- Project [k#x, v1#x, v2#x, k#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] +Filter (k#x = one) ++- Project [k#x, k#x] + +- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out index 497f36ff5644..a294bf73481b 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out @@ -621,28 +621,26 @@ Sort [i#x ASC NULLS FIRST, k#x ASC NULLS FIRST, t#x ASC NULLS FIRST], true SELECT '' AS `xxx`, * FROM J1_TBL LEFT JOIN J2_TBL USING (i) WHERE (k = 1) -- !query analysis -Project [ AS xxx#x, i#x, j#x, t#x, k#x] -+- Filter (k#x = 1) - +- Project [i#x, j#x, t#x, k#x] - +- Join LeftOuter, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet +Filter (k#x = 1) ++- Project [ AS xxx#x, i#x, j#x, t#x, k#x] + +- Join LeftOuter, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query SELECT '' AS `xxx`, * FROM J1_TBL LEFT JOIN J2_TBL USING (i) WHERE (i = 1) -- !query analysis -Project [ AS xxx#x, i#x, j#x, t#x, k#x] -+- Filter (i#x = 1) - +- Project [i#x, j#x, t#x, k#x] - +- Join LeftOuter, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet +Filter (i#x = 1) ++- Project [ AS xxx#x, i#x, j#x, t#x, k#x] + +- Join LeftOuter, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-natural-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-natural-join.sql.out index e43dc13177f5..9a77fbff8e64 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-natural-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-natural-join.sql.out @@ -36,22 +36,21 @@ CreateViewCommand `nt2`, select * from values -- !query SELECT * FROM nt1 natural join nt2 where udf(k) = "one" -- !query analysis -Project [k#x, v1#x, v2#x] -+- Filter (cast(udf(cast(k#x as string)) as string) = one) - +- Project [k#x, v1#x, v2#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x,v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x,v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] +Filter (cast(udf(cast(k#x as string)) as string) = one) ++- Project [k#x, v1#x, v2#x] + +- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x,v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x,v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query From e1e2367ccccc5d074c96915656cc981debb9feb0 Mon Sep 17 00:00:00 2001 From: ashahid Date: Wed, 20 Dec 2023 19:57:37 -0800 Subject: [PATCH 104/129] SPARK-45959. reworked and simplified the code. Instead of Collapsing before analyse, now collapsing after analyze --- .../spark/sql/internal/EarlyCollapseProject.scala | 12 ++++++++++-- .../execution/python/ExtractPythonUDFsSuite.scala | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala index 568951b3b485..4e816c415202 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.internal import scala.collection.mutable import scala.util.{Failure, Success, Try} +import org.apache.spark.sql.Dataset import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, AttributeReference, AttributeSet, Expression, NamedExpression, UserDefinedExpression} import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project, UnaryNode, Window} import org.apache.spark.sql.types.{Metadata, MetadataBuilder} @@ -31,7 +32,7 @@ private[sql] object EarlyCollapseProject { logicalPlan match { case newP @ Project(newProjList, p @ Project(projList, child)) if checkEarlyCollapsePossible(p, newP, child) => - collapseProjectEarly(newProjList, p, projList, child) + collapseProjectEarly(newP, newProjList, p, projList, child) case newP @ Project(newProjList, f @ Filter(_, filterChild: UnaryNode)) => // check if its case of nested filters followed by project @@ -57,7 +58,7 @@ private[sql] object EarlyCollapseProject { val p = projectAtEnd.get val child = p.child if (checkEarlyCollapsePossible(p, newP, child)) { - val newProjOpt = collapseProjectEarly(newProjList, p, p.projectList, child) + val newProjOpt = collapseProjectEarly(newP, newProjList, p, p.projectList, child) newProjOpt.map(collapsedProj => { val lastFilterMod = filterNodes.last.copy(child = collapsedProj) filterNodes.dropRight(1).foldRight(lastFilterMod)((f, c) => f.copy(child = c)) @@ -95,6 +96,7 @@ private[sql] object EarlyCollapseProject { } def collapseProjectEarly( + newP: Project, newProjList: Seq[NamedExpression], p: Project, projList: Seq[NamedExpression], @@ -174,6 +176,12 @@ private[sql] object EarlyCollapseProject { droppedNamedExprs.exists(y => y == x || y.name == x.name)) val newDroppedList = droppedNamedExprs ++ prevDroppedColsFinal newProject.copyTagsFrom(p) + // remove the datasetId copied from current P due to above copy + newProject.unsetTagValue(Dataset.DATASET_ID_TAG) + // use the dataset id of the incoming new project + newP.getTagValue(Dataset.DATASET_ID_TAG).foreach(map => + newProject.setTagValue(Dataset.DATASET_ID_TAG, map.clone())) + newProject.unsetTagValue(LogicalPlan.DROPPED_NAMED_EXPRESSIONS) if (newDroppedList.nonEmpty) { newProject.setTagValue(LogicalPlan.DROPPED_NAMED_EXPRESSIONS, newDroppedList) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala index cce9e0b5cc15..0ab8691801d7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala @@ -186,7 +186,7 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession { .withColumn("d", col("c")) val pythonEvalNodes4 = collectBatchExec(df4.queryExecution.executedPlan) assert(pythonEvalNodes4.size == 1) - assert(pythonEvalNodes4.head.udfs.size == 2) + assert(pythonEvalNodes4.head.udfs.size == 1) val df5 = df.withColumns(Seq("c", "d"), Seq(batchedNondeterministicPythonUDF(col("a")), batchedNondeterministicPythonUDF(col("a")))) From 40ac4a42593cc4def236dcea41636930e7021a10 Mon Sep 17 00:00:00 2001 From: ashahid Date: Tue, 26 Dec 2023 13:34:05 -0800 Subject: [PATCH 105/129] SPARK-45959.Made EarlyCollapseProject an analyzer rule --- .../sql/catalyst/analysis/Analyzer.scala | 2 + .../spark/sql/execution/QueryExecution.scala | 8 +-- .../analysis}/EarlyCollapseProject.scala | 50 ++++++++++--------- .../internal/BaseSessionStateBuilder.scala | 5 +- .../sql/hive/HiveSessionStateBuilder.scala | 5 +- 5 files changed, 37 insertions(+), 33 deletions(-) rename sql/core/src/main/scala/org/apache/spark/sql/{internal => execution/analysis}/EarlyCollapseProject.scala (86%) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 94f6d3346265..ccd4a38d3b16 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -351,6 +351,8 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor Batch("HandleSpecialCommand", Once, HandleSpecialCommand), Batch("Remove watermark for batch query", Once, + EliminateEventTimeWatermark), + Batch("Remove watermark for batch query", Once, EliminateEventTimeWatermark) ) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala index cb9b496e103e..eb5b38d42881 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala @@ -41,7 +41,7 @@ import org.apache.spark.sql.execution.dynamicpruning.PlanDynamicPruningFilters import org.apache.spark.sql.execution.exchange.EnsureRequirements import org.apache.spark.sql.execution.reuse.ReuseExchangeAndSubquery import org.apache.spark.sql.execution.streaming.{IncrementalExecution, OffsetSeqMetadata, WatermarkPropagator} -import org.apache.spark.sql.internal.{EarlyCollapseProject, SQLConf} +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.streaming.OutputMode import org.apache.spark.util.ArrayImplicits._ import org.apache.spark.util.Utils @@ -85,11 +85,7 @@ class QueryExecution( lazy val analyzed: LogicalPlan = { val plan = executePhase(QueryPlanningTracker.ANALYSIS) { // We can't clone `logical` here, which will reset the `_analyzed` flag. - val analyzedPlan = sparkSession.sessionState.analyzer.executeAndCheck(logical, tracker) - analyzedPlan match { - case EarlyCollapseProject(collapsedPlan) => collapsedPlan - case _ => analyzedPlan - } + sparkSession.sessionState.analyzer.executeAndCheck(logical, tracker) } tracker.setAnalyzed(plan) plan diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/EarlyCollapseProject.scala similarity index 86% rename from sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala rename to sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/EarlyCollapseProject.scala index 4e816c415202..3d6d348fd22d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/EarlyCollapseProject.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/EarlyCollapseProject.scala @@ -15,36 +15,38 @@ * limitations under the License. */ -package org.apache.spark.sql.internal +package org.apache.spark.sql.execution.analysis import scala.collection.mutable import scala.util.{Failure, Success, Try} import org.apache.spark.sql.Dataset import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, AttributeReference, AttributeSet, Expression, NamedExpression, UserDefinedExpression} -import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project, UnaryNode, Window} +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.types.{Metadata, MetadataBuilder} import org.apache.spark.util.Utils -private[sql] object EarlyCollapseProject { - def unapply(logicalPlan: LogicalPlan): Option[LogicalPlan] = +private[sql] object EarlyCollapseProject extends Rule[LogicalPlan] { + + def apply(logicalPlan: LogicalPlan): LogicalPlan = logicalPlan match { - case newP @ Project(newProjList, p @ Project(projList, child)) + case newP@Project(newProjList, p@Project(projList, child)) if checkEarlyCollapsePossible(p, newP, child) => - collapseProjectEarly(newP, newProjList, p, projList, child) + collapseProjectEarly(newP, newProjList, p, projList, child) getOrElse newP - case newP @ Project(newProjList, f @ Filter(_, filterChild: UnaryNode)) => + case newP@Project(newProjList, f@Filter(_, filterChild: UnaryNode)) => // check if its case of nested filters followed by project val filterNodes = mutable.ListBuffer(f) var projectAtEnd: Option[Project] = None var keepGoing = true var currentChild = filterChild - while(keepGoing) { + while (keepGoing) { currentChild match { case p: Project => projectAtEnd = Option(p) keepGoing = false - case filter @ Filter(_, u: UnaryNode) => + case filter@Filter(_, u: UnaryNode) => filterNodes += filter currentChild = u case _ => keepGoing = false @@ -62,15 +64,17 @@ private[sql] object EarlyCollapseProject { newProjOpt.map(collapsedProj => { val lastFilterMod = filterNodes.last.copy(child = collapsedProj) filterNodes.dropRight(1).foldRight(lastFilterMod)((f, c) => f.copy(child = c)) - }) + }).getOrElse { + newP + } } else { - None + newP } } else { - None + newP } - case _ => None + case _ => logicalPlan } private def checkEarlyCollapsePossible(p: Project, newP: Project, child: LogicalPlan): Boolean = @@ -87,20 +91,20 @@ private[sql] object EarlyCollapseProject { val newMdBuilder = new MetadataBuilder().withMetadata(from.metadata) val newMd = newMdBuilder.build() al.copy()(exprId = al.exprId, qualifier = from.qualifier, - nonInheritableMetadataKeys = al.nonInheritableMetadataKeys, - explicitMetadata = Option(newMd)) + nonInheritableMetadataKeys = al.nonInheritableMetadataKeys, + explicitMetadata = Option(newMd)) - case attr: AttributeReference => attr.copy(metadata = from.metadata)( - exprId = attr.exprId, qualifier = from.qualifier) + case attr: AttributeReference => attr.copy(metadata = from.metadata)( + exprId = attr.exprId, qualifier = from.qualifier) + } } - } def collapseProjectEarly( - newP: Project, - newProjList: Seq[NamedExpression], - p: Project, - projList: Seq[NamedExpression], - child: LogicalPlan): Option[Project] = { + newP: Project, + newProjList: Seq[NamedExpression], + p: Project, + projList: Seq[NamedExpression], + child: LogicalPlan): Option[Project] = { // In the new column list identify those Named Expressions which are just attributes and // hence pass thru val (_, tinkeredOrNewNamedExprs) = newProjList.partition { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala index 00c72294ca07..b3dc6dd9030e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala @@ -31,7 +31,7 @@ import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.{ColumnarRule, CommandExecutionMode, QueryExecution, SparkOptimizer, SparkPlanner, SparkSqlParser} import org.apache.spark.sql.execution.adaptive.AdaptiveRulesHolder import org.apache.spark.sql.execution.aggregate.{ResolveEncodersInScalaAgg, ScalaUDAF} -import org.apache.spark.sql.execution.analysis.DetectAmbiguousSelfJoin +import org.apache.spark.sql.execution.analysis.{DetectAmbiguousSelfJoin, EarlyCollapseProject} import org.apache.spark.sql.execution.command.CommandCheck import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.execution.datasources.v2.{TableCapabilityCheck, V2SessionCatalog} @@ -208,7 +208,8 @@ abstract class BaseSessionStateBuilder( customResolutionRules override val postHocResolutionRules: Seq[Rule[LogicalPlan]] = - DetectAmbiguousSelfJoin +: + EarlyCollapseProject +: + DetectAmbiguousSelfJoin +: QualifyLocationWithWarehouse(catalog) +: PreprocessTableCreation(catalog) +: PreprocessTableInsertion +: diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala index 32100d060b09..5be17f416051 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala @@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.SparkPlanner import org.apache.spark.sql.execution.aggregate.ResolveEncodersInScalaAgg -import org.apache.spark.sql.execution.analysis.DetectAmbiguousSelfJoin +import org.apache.spark.sql.execution.analysis.{DetectAmbiguousSelfJoin, EarlyCollapseProject} import org.apache.spark.sql.execution.command.CommandCheck import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.execution.datasources.v2.TableCapabilityCheck @@ -97,7 +97,8 @@ class HiveSessionStateBuilder( customResolutionRules override val postHocResolutionRules: Seq[Rule[LogicalPlan]] = - DetectAmbiguousSelfJoin +: + EarlyCollapseProject +: + DetectAmbiguousSelfJoin +: RelationConversions(catalog) +: QualifyLocationWithWarehouse(catalog) +: PreprocessTableCreation(catalog) +: From 9a266d64da3a5052c37aa18b95d3326e63f9b3ed Mon Sep 17 00:00:00 2001 From: ashahid Date: Tue, 26 Dec 2023 14:43:49 -0800 Subject: [PATCH 106/129] SPARK-45959.Made EarlyCollapseProject an analyzer rule --- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 6 +++++- .../spark/sql/internal/BaseSessionStateBuilder.scala | 8 ++++++-- .../apache/spark/sql/hive/HiveSessionStateBuilder.scala | 6 ++++-- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index ccd4a38d3b16..3393c03a6986 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -212,7 +212,9 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor AnalysisHelper.markInAnalyzer { val analyzed = executeAndTrack(plan, tracker) checkAnalysis(analyzed) - analyzed + postAnalysisEarlyOptimizationRules.foldLeft(analyzed) { + case(rs, rule) => rule(rs) + } } } @@ -248,6 +250,8 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor */ val postHocResolutionRules: Seq[Rule[LogicalPlan]] = Nil + val postAnalysisEarlyOptimizationRules: Seq[Rule[LogicalPlan]] = Nil + private def typeCoercionRules(): List[Rule[LogicalPlan]] = if (conf.ansiEnabled) { AnsiTypeCoercion.typeCoercionRules } else { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala index b3dc6dd9030e..dd8c6678df12 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala @@ -208,8 +208,7 @@ abstract class BaseSessionStateBuilder( customResolutionRules override val postHocResolutionRules: Seq[Rule[LogicalPlan]] = - EarlyCollapseProject +: - DetectAmbiguousSelfJoin +: + DetectAmbiguousSelfJoin +: QualifyLocationWithWarehouse(catalog) +: PreprocessTableCreation(catalog) +: PreprocessTableInsertion +: @@ -218,6 +217,9 @@ abstract class BaseSessionStateBuilder( ReplaceCharWithVarchar +: customPostHocResolutionRules + override val postAnalysisEarlyOptimizationRules: Seq[Rule[LogicalPlan]] = + EarlyCollapseProject +: Nil + override val extendedCheckRules: Seq[LogicalPlan => Unit] = PreWriteCheck +: PreReadCheck +: @@ -225,6 +227,8 @@ abstract class BaseSessionStateBuilder( TableCapabilityCheck +: CommandCheck +: customCheckRules + + } /** diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala index 5be17f416051..0f05bbd41931 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala @@ -97,8 +97,7 @@ class HiveSessionStateBuilder( customResolutionRules override val postHocResolutionRules: Seq[Rule[LogicalPlan]] = - EarlyCollapseProject +: - DetectAmbiguousSelfJoin +: + DetectAmbiguousSelfJoin +: RelationConversions(catalog) +: QualifyLocationWithWarehouse(catalog) +: PreprocessTableCreation(catalog) +: @@ -109,6 +108,9 @@ class HiveSessionStateBuilder( ReplaceCharWithVarchar +: customPostHocResolutionRules + override val postAnalysisEarlyOptimizationRules: Seq[Rule[LogicalPlan]] = + EarlyCollapseProject +: Nil + override val extendedCheckRules: Seq[LogicalPlan => Unit] = PreWriteCheck +: PreReadCheck +: From b35e1997bc4feea8f51eda6b87eb48f60a04dfe3 Mon Sep 17 00:00:00 2001 From: ashahid Date: Wed, 27 Dec 2023 18:01:08 -0800 Subject: [PATCH 107/129] SPARK-45959.added new tests and a bugfix --- .../spark/sql/execution/CacheManager.scala | 75 ++++++++++++------- .../spark/sql/EarlyCollapseProjectSuite.scala | 31 ++++++++ 2 files changed, 79 insertions(+), 27 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index 475fec0ef87f..37d93d439c28 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -334,7 +334,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { val canonicalizedCdProj = cdPlanProject.canonicalized.asInstanceOf[Project] // matchIndexInCdPlanProj remains -1 in the end, itindicates it is // new cols created out of existing output attribs - val (incomingToCachedPlanIndxMapping, inComingProjNoDirectMapping) = + val (directlyMappedincomingToCachedPlanIndx, inComingProjNoDirectMapping) = canonicalizedInProj.projectList.zipWithIndex.map { case (inComingNE, index) => // first check for equivalent named expressions..if index is != -1, that means @@ -373,27 +373,53 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { // of the incoming projection. so we have to handle that val unusedAttribsOfCDPlanToGenIncomingAttr = cdPlanProject.projectList.indices.filterNot(i => - incomingToCachedPlanIndxMapping.exists(_._2 == i)).map(i => { + directlyMappedincomingToCachedPlanIndx.exists(_._2 == i)).map(i => { val cdAttrib = cdPlanProject.projectList(i) i -> AttributeReference(cdAttrib.name, cdAttrib.dataType, cdAttrib.nullable, cdAttrib.metadata)(qualifier = cdAttrib.qualifier) }) - + // Because in case of rename multiple incmong named exprs ( attribute or aliases) + // will point to a common cdplan attrib, we need to ensure they do not create + // separate attribute in the the modifiedProject for incoming plan.. + // that is a single attribute ref is present in all mixes of rename and pass thru + // attributes. + // so we will use the first attribute ref in the incoming directly mapped project + // or if no attrib exists ( only case of rename) we will pick the child expr which + // is bound to be an attribute as the common ref. + val cdAttribToCommonAttribForIncmngNe = directlyMappedincomingToCachedPlanIndx.map { + case (inAttribIndex, cdAttribIndex) => + cdPlanProject.projectList(cdAttribIndex).toAttribute -> + incomingProject.projectList(inAttribIndex) + }.groupBy(_._1).map { + case (cdAttr, incomngSeq) => + val incmngCommonAttrib = incomngSeq.map(_._2).flatMap { + case attr: Attribute => Seq(attr) + case Alias(attr: Attribute, _) => Seq(attr) + case _ => Seq.empty + }.headOption.getOrElse( + AttributeReference(cdAttr.name, cdAttr.dataType, cdAttr.nullable)()) + cdAttr -> incmngCommonAttrib + } // If expressions of inComingProjNoDirectMapping can be expressed in terms of the // incoming attribute refs or incoming alias exprs, which can be mapped directly // to the CachedPlan's output, we are good. so lets transform such indirectly // mappable named expressions in terms of mappable attributes of the incoming plan - val directlyMappedIncomingProjs = incomingToCachedPlanIndxMapping.map { - case(incmngIndex, _) => incomingProject.projectList(incmngIndex) - } + val transformedIndirectlyMappableExpr = inComingProjNoDirectMapping.map { case (incomngIndex, _) => - val ne = incomingProject.projectList(incomngIndex) - val modifiedNe = ne.transformDown { - case expr => directlyMappedIncomingProjs.find(ne => ne.toAttribute == expr - || ne.children.headOption.contains(expr)).orElse( + val indirectIncmnNe = incomingProject.projectList(incomngIndex) + val modifiedNe = indirectIncmnNe.transformDown { + case expr => directlyMappedincomingToCachedPlanIndx.find { + case(incomingIndex, _) => + val directMappedNe = incomingProject.projectList(incomingIndex) + directMappedNe.toAttribute == expr || + directMappedNe.children.headOption.contains(expr)}.map { + case (_, cdIndex) => + val cdAttrib = cdPlanProject.projectList(cdIndex).toAttribute + cdAttribToCommonAttribForIncmngNe(cdAttrib) + }.orElse( unusedAttribsOfCDPlanToGenIncomingAttr.find { case(i, _) => val cdNe = canonicalizedCdProj.projectList(i) cdNe.children.headOption.contains(expr.canonicalized) @@ -403,30 +429,25 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { incomngIndex -> modifiedNe }.toMap - val cdAttribToInAttrib = incomingToCachedPlanIndxMapping.map { - case (inAttribIndex, cdAttribIndex) => - cdPlanProject.projectList(cdAttribIndex).toAttribute -> - incomingProject.projectList(inAttribIndex).toAttribute - }.toMap - if (transformedIndirectlyMappableExpr.forall(_._2.references.isEmpty)) { val projectionToForceOnCdPlan = cachedPlan.output.zipWithIndex.map { case (cdAttr, i) => - cdAttribToInAttrib.getOrElse(cdAttr, + cdAttribToCommonAttribForIncmngNe.getOrElse(cdAttr, unusedAttribsOfCDPlanToGenIncomingAttr.find(_._1 == i).map(_._2).get) } val modifiedInProj = incomingProject.projectList.zipWithIndex.map { - case (ne, indx) => if (incomingToCachedPlanIndxMapping.exists(_._1 == indx)) { - ne.toAttribute - } else { - transformedIndirectlyMappableExpr(indx).transformUp { - case Replaceable(attribToUse) => attribToUse - }.asInstanceOf[NamedExpression] + case (ne, indx) => + if (directlyMappedincomingToCachedPlanIndx.exists(_._1 == indx)) { + ne + } else { + transformedIndirectlyMappableExpr(indx).transformUp { + case Replaceable(attribToUse) => attribToUse + }.asInstanceOf[NamedExpression] + } } - } val newPartialPlan = Project(modifiedInProj, cd.cachedRepresentation.toOption. - get.withOutput(projectionToForceOnCdPlan)) + get.withOutput(projectionToForceOnCdPlan)) partialMatch = Option(cd.copy(cachedRepresentation = Left(newPartialPlan))) foundMatch = true } @@ -565,6 +586,6 @@ private case class Replaceable(attribToUse: Attribute) extends LeafExpression { } object CacheManager { - val inMemoryRelationExtractor = (plan: LogicalPlan) => plan.collectLeaves().head. - asInstanceOf[InMemoryRelation] + val inMemoryRelationExtractor: LogicalPlan => InMemoryRelation = + plan => plan.collectLeaves().head.asInstanceOf[InMemoryRelation] } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala index a6f5d9d4bb2f..38bd260d666f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala @@ -165,6 +165,37 @@ class EarlyCollapseProjectSuite extends QueryTest checkAnswer(newDfOpt, newDfUnopt) } + test("mix of column addition, rename and dropping") { + val testDf = spark.range(10).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") + val initNodes = collectNodes(testDf) + val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, + df => df.select($"a" + $"d" as "newCol1", $"b" * $"a" as "newCol2", + $"a" as "renameCola", $"c" * $"d" as "c", $"a")) + val optDfNodes = collectNodes(newDfOpt) + val nonOptDfNodes = collectNodes(newDfUnopt) + assert(initNodes.size === optDfNodes.size) + assert(nonOptDfNodes.size === optDfNodes.size + 1) + checkAnswer(newDfOpt, newDfUnopt) + } + + test("reuse of cache on mix of column addition, rename and dropping") { + val testDf = spark.range(10).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") + testDf.cache() + val initNodes = collectNodes(testDf) + val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, + df => df.select($"c" * $"d" as "c", $"a" + $"d" as "newCol1", $"b" * $"a" as "newCol2", + $"a" as "renameCola", $"a")) + val optDfNodes = collectNodes(newDfOpt) + val nonOptDfNodes = collectNodes(newDfUnopt) + assert(initNodes.size === optDfNodes.size) + assert(nonOptDfNodes.size === optDfNodes.size + 1) + checkAnswer(newDfOpt, newDfUnopt) + assert(newDfOpt.queryExecution.optimizedPlan.collectLeaves().head. + isInstanceOf[InMemoryRelation]) + } + test("use of cached InMemoryRelation when new columns added do not result in new project -1") { val testDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") From 4c4d052ba2da15647ca8912af033bd663b2eb22c Mon Sep 17 00:00:00 2001 From: ashahid Date: Thu, 28 Dec 2023 13:45:08 -0800 Subject: [PATCH 108/129] SPARK-45959.added new tests/ refactored previously added tests and a bugfix --- .../spark/sql/execution/CacheManager.scala | 18 +- .../spark/sql/EarlyCollapseProjectSuite.scala | 251 +++++++----------- 2 files changed, 109 insertions(+), 160 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index 37d93d439c28..31806595cc83 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -438,13 +438,23 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { val modifiedInProj = incomingProject.projectList.zipWithIndex.map { case (ne, indx) => - if (directlyMappedincomingToCachedPlanIndx.exists(_._1 == indx)) { - ne - } else { + directlyMappedincomingToCachedPlanIndx.find(_._1 == indx).map { + case (_, cdIndex) => + ne match { + case attr: Attribute => attr + case al: Alias => + val cdAttr = cdPlanProject.projectList(cdIndex).toAttribute + al.copy(child = cdAttribToCommonAttribForIncmngNe(cdAttr))( + exprId = al.exprId, qualifier = al.qualifier, + explicitMetadata = al.explicitMetadata, + nonInheritableMetadataKeys = al.nonInheritableMetadataKeys + ) + } + }.getOrElse({ transformedIndirectlyMappableExpr(indx).transformUp { case Replaceable(attribToUse) => attribToUse }.asInstanceOf[NamedExpression] - } + }) } val newPartialPlan = Project(modifiedInProj, cd.cachedRepresentation.toOption. get.withOutput(projectionToForceOnCdPlan)) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala index 38bd260d666f..7d1f5f868ef0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala @@ -29,242 +29,181 @@ class EarlyCollapseProjectSuite extends QueryTest import testImplicits._ test("withColumns: check no new project addition for simple columns addition") { - val testDf = spark.range(20).select($"id" as "a", $"id" as "b") - val initNodes = collectNodes(testDf) - val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, - df => df.withColumns(Seq("newCol1", "newCol2"), Seq(col("a") + 1, col("b") + 2))) - val optDfNodes = collectNodes(newDfOpt) - val nonOptDfNodes = collectNodes(newDfUnopt) - assert(initNodes.size === optDfNodes.size) - assert(nonOptDfNodes.size === optDfNodes.size + 1) - checkAnswer(newDfOpt, newDfUnopt) + val baseDf = spark.range(20).select($"id" as "a", $"id" as "b") + checkProjectCollapseAndCacheUse(baseDf, + df => df.withColumns(Seq("newCol1", "newCol2"), Seq(col("a") + 1, col("b") + 2)), + checkWithBaseDfCache = false) } test("withColumns: check no new project addition if redefined alias is not used in" + " new columns") { - val testDf = spark.range(20).select($"id" as "a", $"id" as "b").select($"a" + 1 as "a", + val baseDf = spark.range(20).select($"id" as "a", $"id" as "b").select($"a" + 1 as "a", $"b") - val initNodes = collectNodes(testDf) - val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, - df => df.withColumns(Seq("newCol1"), Seq(col("b") + 2))) - val optDfNodes = collectNodes(newDfOpt) - val nonOptDfNodes = collectNodes(newDfUnopt) - assert(initNodes.size === optDfNodes.size) - assert(nonOptDfNodes.size === optDfNodes.size + 1) - checkAnswer(newDfOpt, newDfUnopt) + + checkProjectCollapseAndCacheUse(baseDf, + df => df.withColumns(Seq("newCol1"), Seq(col("b") + 2)), checkWithBaseDfCache = false) } test("withColumns: no new project addition if redefined alias is used in new columns - 1") { - val testDf = spark.range(20).select($"id" as "a", $"id" as "b").select($"a" + 1 as "a", + val baseDf = spark.range(20).select($"id" as "a", $"id" as "b").select($"a" + 1 as "a", $"b") - val initNodes = collectNodes(testDf) - val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, - df => df.withColumns(Seq("newCol1"), Seq(col("a") + 2))) - val optDfNodes = collectNodes(newDfOpt) - val nonOptDfNodes = collectNodes(newDfUnopt) - assert(initNodes.size === optDfNodes.size) - assert(nonOptDfNodes.size === optDfNodes.size + 1) - checkAnswer(newDfOpt, newDfUnopt) + + checkProjectCollapseAndCacheUse(baseDf, + df => df.withColumns(Seq("newCol1"), Seq(col("a") + 2)), checkWithBaseDfCache = false) } test("withColumns: no new project addition if redefined alias is used in new columns - 2") { - val testDf = spark.range(20).select($"id" as "a", $"id" as "b"). + val baseDf = spark.range(20).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b"). select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e") - val initNodes = collectNodes(testDf) - val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, - df => df.withColumns(Seq("newCol1"), Seq(col("c") + 2 + col("a") * col("e")))) - val optDfNodes = collectNodes(newDfOpt) - val nonOptDfNodes = collectNodes(newDfUnopt) - assert(initNodes.size === optDfNodes.size) - assert(nonOptDfNodes.size === optDfNodes.size + 1) - checkAnswer(newDfOpt, newDfUnopt) + checkProjectCollapseAndCacheUse(baseDf, + df => df.withColumns(Seq("newCol1"), Seq(col("c") + 2 + col("a") * col("e"))), + checkWithBaseDfCache = false) } test("withColumnRenamed: remap of column should not result in new project if the source" + " of remap is not used in other cols") { - val testDf = spark.range(10).select($"id" as "a", $"id" as "b") - val initNodes = collectNodes(testDf) - val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, - df => df.withColumnRenamed("a", "c")) - val optDfNodes = collectNodes(newDfOpt) - val nonOptDfNodes = collectNodes(newDfUnopt) - assert(initNodes.size === optDfNodes.size) - assert(nonOptDfNodes.size === optDfNodes.size + 1) - checkAnswer(newDfOpt, newDfUnopt) + val baseDf = spark.range(10).select($"id" as "a", $"id" as "b") + checkProjectCollapseAndCacheUse(baseDf, df => df.withColumnRenamed("a", "c"), + checkWithBaseDfCache = false) } test("withColumnRenamed: remap of column should not result in new project if the source" + " of remap is an attribute used in other cols") { - val testDf = spark.range(10).select($"id" as "a", $"id" as "b"). + val baseDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b") - val initNodes = collectNodes(testDf) - val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, - df => df.withColumnRenamed("a", "d")) - val optDfNodes = collectNodes(newDfOpt) - val nonOptDfNodes = collectNodes(newDfUnopt) - assert(initNodes.size === optDfNodes.size) - assert(nonOptDfNodes.size === optDfNodes.size + 1) - checkAnswer(newDfOpt, newDfUnopt) + checkProjectCollapseAndCacheUse(baseDf, df => df.withColumnRenamed("a", "d"), + checkWithBaseDfCache = false) } test("withColumnRenamed: remap of column should not result in new project if the remap" + " is on an alias") { - val testDf = spark.range(10).select($"id" as "a", $"id" as "b"). + val baseDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d" ) - val initNodes = collectNodes(testDf) - val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, - df => df.withColumnRenamed("d", "x")) - val optDfNodes = collectNodes(newDfOpt) - val nonOptDfNodes = collectNodes(newDfUnopt) - assert(initNodes.size === optDfNodes.size) - assert(nonOptDfNodes.size === optDfNodes.size + 1) - checkAnswer(newDfOpt, newDfUnopt) + checkProjectCollapseAndCacheUse(baseDf, df => df.withColumnRenamed("d", "x"), + checkWithBaseDfCache = false) } test("withColumnRenamed: remap of column should not result in new project if the remap" + " source an alias and that attribute is also projected as another attribute") { - val testDf = spark.range(10).select($"id" as "a", $"id" as "b"). + val baseDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d"). select($"c", $"a", $"b", $"d", $"d" as "k") - val initNodes = collectNodes(testDf) - val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, - df => df.withColumnRenamed("d", "x")) - val optDfNodes = collectNodes(newDfOpt) - val nonOptDfNodes = collectNodes(newDfUnopt) - assert(initNodes.size === optDfNodes.size) - assert(nonOptDfNodes.size === optDfNodes.size + 1) - checkAnswer(newDfOpt, newDfUnopt) + checkProjectCollapseAndCacheUse(baseDf, df => df.withColumnRenamed("d", "x"), + checkWithBaseDfCache = false) } test("withColumnRenamed: test multi column remap") { - val testDf = spark.range(10).select($"id" as "a", $"id" as "b"). + val baseDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") - val initNodes = collectNodes(testDf) - val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, - df => df.withColumnsRenamed(Map("d" -> "x", "c" -> "k", "a" -> "u"))) - val optDfNodes = collectNodes(newDfOpt) - val nonOptDfNodes = collectNodes(newDfUnopt) - assert(initNodes.size === optDfNodes.size) - assert(nonOptDfNodes.size === optDfNodes.size + 1) - checkAnswer(newDfOpt, newDfUnopt) + checkProjectCollapseAndCacheUse(baseDf, + df => df.withColumnsRenamed(Map("d" -> "x", "c" -> "k", "a" -> "u")), + checkWithBaseDfCache = false) } test("withColumns: test multi column addition") { - val testDf = spark.range(10).select($"id" as "a", $"id" as "b"). + val baseDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") - val initNodes = collectNodes(testDf) - val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, + checkProjectCollapseAndCacheUse(baseDf, df => df.withColumns( Seq("newCol1", "newCol2", "newCol3", "newCol4"), - Seq(col("a") + 2, col("b") + 7, col("a") + col("b"), col("a") + col("d")))) - val optDfNodes = collectNodes(newDfOpt) - val nonOptDfNodes = collectNodes(newDfUnopt) - assert(initNodes.size === optDfNodes.size) - assert(nonOptDfNodes.size === optDfNodes.size + 1) - checkAnswer(newDfOpt, newDfUnopt) + Seq(col("a") + 2, col("b") + 7, col("a") + col("b"), col("a") + col("d"))), + checkWithBaseDfCache = false) } test("mix of column addition, rename and dropping") { - val testDf = spark.range(10).select($"id" as "a", $"id" as "b"). + val baseDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") - val initNodes = collectNodes(testDf) - val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, + checkProjectCollapseAndCacheUse(baseDf, df => df.select($"a" + $"d" as "newCol1", $"b" * $"a" as "newCol2", - $"a" as "renameCola", $"c" * $"d" as "c", $"a")) - val optDfNodes = collectNodes(newDfOpt) - val nonOptDfNodes = collectNodes(newDfUnopt) - assert(initNodes.size === optDfNodes.size) - assert(nonOptDfNodes.size === optDfNodes.size + 1) - checkAnswer(newDfOpt, newDfUnopt) + $"a" as "renameCola", $"c" * $"d" as "c", $"a"), checkWithBaseDfCache = false) } - test("reuse of cache on mix of column addition, rename and dropping") { - val testDf = spark.range(10).select($"id" as "a", $"id" as "b"). + test("reuse of cache on mix of column addition, rename and dropping - 1") { + val baseDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") - testDf.cache() - val initNodes = collectNodes(testDf) - val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, + checkProjectCollapseAndCacheUse(baseDf, df => df.select($"c" * $"d" as "c", $"a" + $"d" as "newCol1", $"b" * $"a" as "newCol2", - $"a" as "renameCola", $"a")) - val optDfNodes = collectNodes(newDfOpt) - val nonOptDfNodes = collectNodes(newDfUnopt) - assert(initNodes.size === optDfNodes.size) - assert(nonOptDfNodes.size === optDfNodes.size + 1) - checkAnswer(newDfOpt, newDfUnopt) - assert(newDfOpt.queryExecution.optimizedPlan.collectLeaves().head. - isInstanceOf[InMemoryRelation]) + $"a" as "renameCola", $"a"), checkWithBaseDfCache = true) + } + + test("reuse of cache on mix of column addition, rename and dropping - 2") { + val baseDf = spark.range(10).select($"id" as "a", $"id" + 5 as "b"). + select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") + checkProjectCollapseAndCacheUse(baseDf, + df => df.select($"d", $"b" as "renameB", $"a" as "renameA", $"a" as "renameColA"), + checkWithBaseDfCache = true) + } + + test("reuse of cache on mix of column addition, rename and dropping - 3") { + val baseDf = spark.range(10).select($"id" as "a", $"id" + 5 as "b"). + select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") + checkProjectCollapseAndCacheUse(baseDf, + df => df.select($"d" * $"a" as "d", $"b" as "renameB", $"a" * $"d" as "renameA", + $"a" as "renameColA"), checkWithBaseDfCache = true) + } + + test("reuse of cache on mix of column addition, rename and dropping - 4") { + val baseDf = spark.range(10).select($"id" as "a", $"id" + 5 as "b"). + select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") + checkProjectCollapseAndCacheUse(baseDf, df => df.select($"c"), checkWithBaseDfCache = true) } test("use of cached InMemoryRelation when new columns added do not result in new project -1") { - val testDf = spark.range(10).select($"id" as "a", $"id" as "b"). + val baseDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") - testDf.cache() - val initNodes = collectNodes(testDf) - val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, + checkProjectCollapseAndCacheUse(baseDf, df => df.withColumns( Seq("newCol1", "newCol2", "newCol3", "newCol4"), Seq(col("a") + 2, col("b") + 7, col("a") + col("b"), col("a") + col("d")) - )) - val optDfNodes = collectNodes(newDfOpt) - val nonOptDfNodes = collectNodes(newDfUnopt) - assert(initNodes.size === optDfNodes.size) - assert(nonOptDfNodes.size === optDfNodes.size + 1) - checkAnswer(newDfOpt, newDfUnopt) - assert(newDfOpt.queryExecution.optimizedPlan.collectLeaves().head. - isInstanceOf[InMemoryRelation]) + ), checkWithBaseDfCache = true) } test("use of cached InMemoryRelation when new columns added do not result in new project -2") { - val testDf = spark.range(20).select($"id" as "a", $"id" as "b"). + val baseDf = spark.range(20).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b"). select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e") - testDf.cache() - val initNodes = collectNodes(testDf) - val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, - df => df.withColumns(Seq("newCol1"), Seq(col("c") + 2 + col("a") * col("e")))) - val optDfNodes = collectNodes(newDfOpt) - val nonOptDfNodes = collectNodes(newDfUnopt) - assert(initNodes.size === optDfNodes.size) - assert(nonOptDfNodes.size === optDfNodes.size + 1) - checkAnswer(newDfOpt, newDfUnopt) - assert(newDfOpt.queryExecution.optimizedPlan.collectLeaves().head. - isInstanceOf[InMemoryRelation]) + checkProjectCollapseAndCacheUse(baseDf, + df => df.withColumns(Seq("newCol1"), Seq(col("c") + 2 + col("a") * col("e"))), + checkWithBaseDfCache = true) } test("use of cached InMemoryRelation when new columns added do not result in new project, with" + "positions changed") { - val testDf = spark.range(20).select($"id" as "a", $"id" as "b"). + val baseDf = spark.range(20).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b"). select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e") - testDf.cache() - val initNodes = collectNodes(testDf) - val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, - df => df.select( $"e", $"a", $"c" + 2 + $"a" * $"e" as "newCol", $"c", $"d", $"b")) - val optDfNodes = collectNodes(newDfOpt) - val nonOptDfNodes = collectNodes(newDfUnopt) - assert(initNodes.size === optDfNodes.size) - assert(nonOptDfNodes.size === optDfNodes.size + 1) - checkAnswer(newDfOpt, newDfUnopt) - assert(newDfOpt.queryExecution.optimizedPlan.collectLeaves().head. - isInstanceOf[InMemoryRelation]) + checkProjectCollapseAndCacheUse(baseDf, + df => df.select( $"e", $"a", $"c" + 2 + $"a" * $"e" as "newCol", $"c", $"d", $"b"), + checkWithBaseDfCache = true) } test("use of cached InMemoryRelation when renamed columns do not result in new project") { - val testDf = spark.range(10).select($"id" as "a", $"id" as "b"). + val baseDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") - testDf.cache() - val initNodes = collectNodes(testDf) - val (newDfOpt, newDfUnopt) = getComparableDataFrames(testDf, - df => df.withColumnsRenamed( - Map("c" -> "c1", "a" -> "a1", "b" -> "b1", "d" -> "d1"))) + + checkProjectCollapseAndCacheUse(baseDf, df => df.withColumnsRenamed( + Map("c" -> "c1", "a" -> "a1", "b" -> "b1", "d" -> "d1")), checkWithBaseDfCache = true) + } + + private def checkProjectCollapseAndCacheUse( + baseDf: DataFrame, + testExec: DataFrame => DataFrame, + checkWithBaseDfCache: Boolean): Unit = { + if (checkWithBaseDfCache) { + baseDf.cache() + } + val initNodes = collectNodes(baseDf) + val (newDfOpt, newDfUnopt) = getComparableDataFrames(baseDf, testExec) val optDfNodes = collectNodes(newDfOpt) val nonOptDfNodes = collectNodes(newDfUnopt) assert(initNodes.size === optDfNodes.size) assert(nonOptDfNodes.size === optDfNodes.size + 1) checkAnswer(newDfOpt, newDfUnopt) - assert(newDfOpt.queryExecution.optimizedPlan.collectLeaves().head. - isInstanceOf[InMemoryRelation]) + if (checkWithBaseDfCache) { + assert(newDfOpt.queryExecution.optimizedPlan.collectLeaves().head. + isInstanceOf[InMemoryRelation]) + } } private def getComparableDataFrames( From b5765c93a4f754b36f1616166e7bb45efc662ded Mon Sep 17 00:00:00 2001 From: ashahid Date: Thu, 28 Dec 2023 13:54:05 -0800 Subject: [PATCH 109/129] SPARK-45959.refactored previously added tests to run in cache mode and without caching --- .../spark/sql/EarlyCollapseProjectSuite.scala | 59 ++++++++----------- ...EarlyCollapseProjectWithCachingSuite.scala | 22 +++++++ 2 files changed, 45 insertions(+), 36 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectWithCachingSuite.scala diff --git a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala index 7d1f5f868ef0..1b2c9648e79b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala @@ -27,12 +27,11 @@ import org.apache.spark.sql.test.SharedSparkSession class EarlyCollapseProjectSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlanHelper { import testImplicits._ - + val useCaching: Boolean = false test("withColumns: check no new project addition for simple columns addition") { val baseDf = spark.range(20).select($"id" as "a", $"id" as "b") checkProjectCollapseAndCacheUse(baseDf, - df => df.withColumns(Seq("newCol1", "newCol2"), Seq(col("a") + 1, col("b") + 2)), - checkWithBaseDfCache = false) + df => df.withColumns(Seq("newCol1", "newCol2"), Seq(col("a") + 1, col("b") + 2))) } test("withColumns: check no new project addition if redefined alias is not used in" + @@ -41,7 +40,7 @@ class EarlyCollapseProjectSuite extends QueryTest $"b") checkProjectCollapseAndCacheUse(baseDf, - df => df.withColumns(Seq("newCol1"), Seq(col("b") + 2)), checkWithBaseDfCache = false) + df => df.withColumns(Seq("newCol1"), Seq(col("b") + 2))) } test("withColumns: no new project addition if redefined alias is used in new columns - 1") { @@ -49,7 +48,7 @@ class EarlyCollapseProjectSuite extends QueryTest $"b") checkProjectCollapseAndCacheUse(baseDf, - df => df.withColumns(Seq("newCol1"), Seq(col("a") + 2)), checkWithBaseDfCache = false) + df => df.withColumns(Seq("newCol1"), Seq(col("a") + 2))) } test("withColumns: no new project addition if redefined alias is used in new columns - 2") { @@ -57,31 +56,27 @@ class EarlyCollapseProjectSuite extends QueryTest select($"a" + 1 as "c", $"a", $"b"). select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e") checkProjectCollapseAndCacheUse(baseDf, - df => df.withColumns(Seq("newCol1"), Seq(col("c") + 2 + col("a") * col("e"))), - checkWithBaseDfCache = false) + df => df.withColumns(Seq("newCol1"), Seq(col("c") + 2 + col("a") * col("e")))) } test("withColumnRenamed: remap of column should not result in new project if the source" + " of remap is not used in other cols") { val baseDf = spark.range(10).select($"id" as "a", $"id" as "b") - checkProjectCollapseAndCacheUse(baseDf, df => df.withColumnRenamed("a", "c"), - checkWithBaseDfCache = false) + checkProjectCollapseAndCacheUse(baseDf, df => df.withColumnRenamed("a", "c")) } test("withColumnRenamed: remap of column should not result in new project if the source" + " of remap is an attribute used in other cols") { val baseDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b") - checkProjectCollapseAndCacheUse(baseDf, df => df.withColumnRenamed("a", "d"), - checkWithBaseDfCache = false) + checkProjectCollapseAndCacheUse(baseDf, df => df.withColumnRenamed("a", "d")) } test("withColumnRenamed: remap of column should not result in new project if the remap" + " is on an alias") { val baseDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d" ) - checkProjectCollapseAndCacheUse(baseDf, df => df.withColumnRenamed("d", "x"), - checkWithBaseDfCache = false) + checkProjectCollapseAndCacheUse(baseDf, df => df.withColumnRenamed("d", "x")) } test("withColumnRenamed: remap of column should not result in new project if the remap" + @@ -89,16 +84,14 @@ class EarlyCollapseProjectSuite extends QueryTest val baseDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d"). select($"c", $"a", $"b", $"d", $"d" as "k") - checkProjectCollapseAndCacheUse(baseDf, df => df.withColumnRenamed("d", "x"), - checkWithBaseDfCache = false) + checkProjectCollapseAndCacheUse(baseDf, df => df.withColumnRenamed("d", "x")) } test("withColumnRenamed: test multi column remap") { val baseDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") checkProjectCollapseAndCacheUse(baseDf, - df => df.withColumnsRenamed(Map("d" -> "x", "c" -> "k", "a" -> "u")), - checkWithBaseDfCache = false) + df => df.withColumnsRenamed(Map("d" -> "x", "c" -> "k", "a" -> "u"))) } test("withColumns: test multi column addition") { @@ -107,8 +100,7 @@ class EarlyCollapseProjectSuite extends QueryTest checkProjectCollapseAndCacheUse(baseDf, df => df.withColumns( Seq("newCol1", "newCol2", "newCol3", "newCol4"), - Seq(col("a") + 2, col("b") + 7, col("a") + col("b"), col("a") + col("d"))), - checkWithBaseDfCache = false) + Seq(col("a") + 2, col("b") + 7, col("a") + col("b"), col("a") + col("d")))) } test("mix of column addition, rename and dropping") { @@ -116,7 +108,7 @@ class EarlyCollapseProjectSuite extends QueryTest select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") checkProjectCollapseAndCacheUse(baseDf, df => df.select($"a" + $"d" as "newCol1", $"b" * $"a" as "newCol2", - $"a" as "renameCola", $"c" * $"d" as "c", $"a"), checkWithBaseDfCache = false) + $"a" as "renameCola", $"c" * $"d" as "c", $"a")) } test("reuse of cache on mix of column addition, rename and dropping - 1") { @@ -124,15 +116,14 @@ class EarlyCollapseProjectSuite extends QueryTest select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") checkProjectCollapseAndCacheUse(baseDf, df => df.select($"c" * $"d" as "c", $"a" + $"d" as "newCol1", $"b" * $"a" as "newCol2", - $"a" as "renameCola", $"a"), checkWithBaseDfCache = true) + $"a" as "renameCola", $"a")) } test("reuse of cache on mix of column addition, rename and dropping - 2") { val baseDf = spark.range(10).select($"id" as "a", $"id" + 5 as "b"). select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") checkProjectCollapseAndCacheUse(baseDf, - df => df.select($"d", $"b" as "renameB", $"a" as "renameA", $"a" as "renameColA"), - checkWithBaseDfCache = true) + df => df.select($"d", $"b" as "renameB", $"a" as "renameA", $"a" as "renameColA")) } test("reuse of cache on mix of column addition, rename and dropping - 3") { @@ -140,13 +131,13 @@ class EarlyCollapseProjectSuite extends QueryTest select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") checkProjectCollapseAndCacheUse(baseDf, df => df.select($"d" * $"a" as "d", $"b" as "renameB", $"a" * $"d" as "renameA", - $"a" as "renameColA"), checkWithBaseDfCache = true) + $"a" as "renameColA")) } test("reuse of cache on mix of column addition, rename and dropping - 4") { val baseDf = spark.range(10).select($"id" as "a", $"id" + 5 as "b"). select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") - checkProjectCollapseAndCacheUse(baseDf, df => df.select($"c"), checkWithBaseDfCache = true) + checkProjectCollapseAndCacheUse(baseDf, df => df.select($"c")) } test("use of cached InMemoryRelation when new columns added do not result in new project -1") { @@ -155,8 +146,7 @@ class EarlyCollapseProjectSuite extends QueryTest checkProjectCollapseAndCacheUse(baseDf, df => df.withColumns( Seq("newCol1", "newCol2", "newCol3", "newCol4"), - Seq(col("a") + 2, col("b") + 7, col("a") + col("b"), col("a") + col("d")) - ), checkWithBaseDfCache = true) + Seq(col("a") + 2, col("b") + 7, col("a") + col("b"), col("a") + col("d")))) } test("use of cached InMemoryRelation when new columns added do not result in new project -2") { @@ -164,8 +154,7 @@ class EarlyCollapseProjectSuite extends QueryTest select($"a" + 1 as "c", $"a", $"b"). select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e") checkProjectCollapseAndCacheUse(baseDf, - df => df.withColumns(Seq("newCol1"), Seq(col("c") + 2 + col("a") * col("e"))), - checkWithBaseDfCache = true) + df => df.withColumns(Seq("newCol1"), Seq(col("c") + 2 + col("a") * col("e")))) } test("use of cached InMemoryRelation when new columns added do not result in new project, with" + @@ -174,8 +163,7 @@ class EarlyCollapseProjectSuite extends QueryTest select($"a" + 1 as "c", $"a", $"b"). select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e") checkProjectCollapseAndCacheUse(baseDf, - df => df.select( $"e", $"a", $"c" + 2 + $"a" * $"e" as "newCol", $"c", $"d", $"b"), - checkWithBaseDfCache = true) + df => df.select( $"e", $"a", $"c" + 2 + $"a" * $"e" as "newCol", $"c", $"d", $"b")) } test("use of cached InMemoryRelation when renamed columns do not result in new project") { @@ -183,14 +171,13 @@ class EarlyCollapseProjectSuite extends QueryTest select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") checkProjectCollapseAndCacheUse(baseDf, df => df.withColumnsRenamed( - Map("c" -> "c1", "a" -> "a1", "b" -> "b1", "d" -> "d1")), checkWithBaseDfCache = true) + Map("c" -> "c1", "a" -> "a1", "b" -> "b1", "d" -> "d1"))) } private def checkProjectCollapseAndCacheUse( baseDf: DataFrame, - testExec: DataFrame => DataFrame, - checkWithBaseDfCache: Boolean): Unit = { - if (checkWithBaseDfCache) { + testExec: DataFrame => DataFrame): Unit = { + if (useCaching) { baseDf.cache() } val initNodes = collectNodes(baseDf) @@ -200,7 +187,7 @@ class EarlyCollapseProjectSuite extends QueryTest assert(initNodes.size === optDfNodes.size) assert(nonOptDfNodes.size === optDfNodes.size + 1) checkAnswer(newDfOpt, newDfUnopt) - if (checkWithBaseDfCache) { + if (useCaching) { assert(newDfOpt.queryExecution.optimizedPlan.collectLeaves().head. isInstanceOf[InMemoryRelation]) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectWithCachingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectWithCachingSuite.scala new file mode 100644 index 000000000000..218bf11bf7d8 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectWithCachingSuite.scala @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +class EarlyCollapseProjectWithCachingSuite extends EarlyCollapseProjectSuite { + override val useCaching: Boolean = true +} From 7c9415b2b2d5ae18b808bef6806a6672f18b3f9d Mon Sep 17 00:00:00 2001 From: ashahid Date: Thu, 28 Dec 2023 17:48:42 -0800 Subject: [PATCH 110/129] SPARK-45959. added more tests and a bug fix in canonicalization logic --- .../spark/sql/execution/CacheManager.scala | 25 +++++++++++++------ .../spark/sql/EarlyCollapseProjectSuite.scala | 6 +++++ 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index 31806595cc83..6fd5ae295d6f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -330,17 +330,24 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { incomingPlan.isInstanceOf[Project]) { val incomingProject = incomingPlan.asInstanceOf[Project] val cdPlanProject = cachedPlan.asInstanceOf[Project] - val canonicalizedInProj = incomingProject.canonicalized.asInstanceOf[Project] - val canonicalizedCdProj = cdPlanProject.canonicalized.asInstanceOf[Project] + // since the child of both incoming and cached plan are same + // that is why we are here. for mapping and comparison purposes lets + // canonicalize the cachedPlan's project list in terms of the incoming plan's child + // so that we can map correctly. + val cdPlanToIncomngPlanChildOutputMapping = + cdPlanProject.child.output.zip(incomingProject.child.output).toMap + // val canonicalizedInProj = incomingProject.canonicalized.asInstanceOf[Project] + val canonicalizedCdProjList = cdPlanProject.projectList.map(_.transformUp { + case attr: Attribute => cdPlanToIncomngPlanChildOutputMapping(attr) + }.asInstanceOf[NamedExpression]) // matchIndexInCdPlanProj remains -1 in the end, itindicates it is // new cols created out of existing output attribs val (directlyMappedincomingToCachedPlanIndx, inComingProjNoDirectMapping) = - canonicalizedInProj.projectList.zipWithIndex.map { + incomingProject.projectList.zipWithIndex.map { case (inComingNE, index) => // first check for equivalent named expressions..if index is != -1, that means // it is pass thru Alias or pass thru - Attribute - var matchIndexInCdPlanProj = - canonicalizedCdProj.projectList.indexWhere(_ == inComingNE) + var matchIndexInCdPlanProj = canonicalizedCdProjList.indexWhere(_ == inComingNE) if (matchIndexInCdPlanProj == -1) { // if match index is -1, that means it could be two possibilities: // 1) it is a case of rename which means the incoming expr is an alias and @@ -356,7 +363,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { case x: AttributeReference => x case Alias(expr, _) => expr } - matchIndexInCdPlanProj = canonicalizedCdProj.projectList.indexWhere { + matchIndexInCdPlanProj = canonicalizedCdProjList.indexWhere { case Alias(expr, _) => expr == incomingExprToCheck case x => x == incomingExprToCheck } @@ -410,6 +417,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { val transformedIndirectlyMappableExpr = inComingProjNoDirectMapping.map { case (incomngIndex, _) => val indirectIncmnNe = incomingProject.projectList(incomngIndex) + val modifiedNe = indirectIncmnNe.transformDown { case expr => directlyMappedincomingToCachedPlanIndx.find { case(incomingIndex, _) => @@ -421,11 +429,12 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { cdAttribToCommonAttribForIncmngNe(cdAttrib) }.orElse( unusedAttribsOfCDPlanToGenIncomingAttr.find { - case(i, _) => val cdNe = canonicalizedCdProj.projectList(i) - cdNe.children.headOption.contains(expr.canonicalized) + case(i, _) => val cdNe = canonicalizedCdProjList(i) + cdNe.children.headOption.contains(expr) }.map(_._2)). map(ne => Replaceable(ne.toAttribute)).getOrElse(expr) }.asInstanceOf[NamedExpression] + incomngIndex -> modifiedNe }.toMap diff --git a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala index 1b2c9648e79b..2ea74582e21e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala @@ -140,6 +140,12 @@ class EarlyCollapseProjectSuite extends QueryTest checkProjectCollapseAndCacheUse(baseDf, df => df.select($"c")) } + test("reuse of cache on mix of column addition, rename and dropping - 5") { + val baseDf = spark.range(10).select($"id" as "a", $"id" + 5 as "b"). + select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") + checkProjectCollapseAndCacheUse(baseDf, df => df.select($"d" * 7 as "a")) + } + test("use of cached InMemoryRelation when new columns added do not result in new project -1") { val baseDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") From 13b8675cea9a0d707a78c2dcc08a682f0ef417d1 Mon Sep 17 00:00:00 2001 From: ashahid Date: Fri, 29 Dec 2023 13:52:55 -0800 Subject: [PATCH 111/129] SPARK-45959. added a new test --- .../spark/sql/EarlyCollapseProjectSuite.scala | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala index 2ea74582e21e..953cae878e22 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.execution.columnar.InMemoryRelation -import org.apache.spark.sql.functions.col +import org.apache.spark.sql.functions.{col, lit} import org.apache.spark.sql.test.SharedSparkSession @@ -146,11 +146,25 @@ class EarlyCollapseProjectSuite extends QueryTest checkProjectCollapseAndCacheUse(baseDf, df => df.select($"d" * 7 as "a")) } + test("reuse of cache on mix of column addition, rename and dropping - 6") { + val baseDf = spark.range(10).select($"id" as "a", $"id" + 5 as "b"). + select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") + checkProjectCollapseAndCacheUse(baseDf, df => df.select($"d" * 7 as "a", $"d" * 7 as "b", + $"b" + $"a" as "e")) + } + + test("reuse of cache on mix of column addition, rename and dropping - 7") { + val baseDf = spark.range(10).select($"id" as "a", $"id" + 5 as "b"). + select($"a" + $"b" as "c", $"a", $"b").select( lit(9) as "e", $"c", lit(11) as "a", $"b", + $"c" * $"a" * $"b" as "d") + checkProjectCollapseAndCacheUse(baseDf, df => df.select($"a" as "a1", lit(7) as "d1", + $"b" as "b1", $"c" * $"a" as "c", lit(13) as "f")) + } + test("use of cached InMemoryRelation when new columns added do not result in new project -1") { val baseDf = spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") - checkProjectCollapseAndCacheUse(baseDf, - df => df.withColumns( + checkProjectCollapseAndCacheUse(baseDf, df => df.withColumns( Seq("newCol1", "newCol2", "newCol3", "newCol4"), Seq(col("a") + 2, col("b") + 7, col("a") + col("b"), col("a") + col("d")))) } From d660dbb669d7143c875ff4d2cab3d097ac5132ae Mon Sep 17 00:00:00 2001 From: ashahid Date: Tue, 2 Jan 2024 10:24:00 -0800 Subject: [PATCH 112/129] SPARK-45959. removed code of storing of dropped columns which may not be needed as the early collapse is now done after analysis --- .../analysis/ColumnResolutionHelper.scala | 25 ++++--------------- .../catalyst/plans/logical/LogicalPlan.scala | 8 +----- .../analysis/EarlyCollapseProject.scala | 14 ----------- .../internal/BaseSessionStateBuilder.scala | 2 -- 4 files changed, 6 insertions(+), 43 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala index 0c2d08371dad..a90c61565039 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala @@ -59,39 +59,24 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase { val (newExprs, newChild) = { // Resolving expressions against current plan. val maybeResolvedExprs = exprs.map(resolveExpressionByPlanOutput(_, u)) - // Recursively resolving expressions on the child of current plan. resolveExprsAndAddMissingAttrs(maybeResolvedExprs, u.child) } // If some attributes used by expressions are resolvable only on the rewritten child // plan, we need to add them into original projection. - val (missingAttrsFromOutput, missingAttrsFromDroppedAttr) = { - val missing1 = (AttributeSet(newExprs) -- u.outputSet) - val fulfilledFromOutput = missing1.intersect(newChild.outputSet) - val missing2 = missing1 -- fulfilledFromOutput - val fulfilledFromDroppedCol = missing2.intersect(u.getTagValue( - LogicalPlan.DROPPED_NAMED_EXPRESSIONS). - map(sq => AttributeSet(sq.map(_.toAttribute))).getOrElse(AttributeSet.empty)) - fulfilledFromOutput -> fulfilledFromDroppedCol - } + lazy val missingAttrs = + (AttributeSet(newExprs) -- u.outputSet).intersect(newChild.outputSet) u match { case p: Project => - val droppedNamedExprs = p.getTagValue(LogicalPlan.DROPPED_NAMED_EXPRESSIONS). - getOrElse(Seq.empty) - val newProject = Project(p.projectList ++ missingAttrsFromOutput ++ - missingAttrsFromDroppedAttr.map(attr => - droppedNamedExprs.find(_.toAttribute.canonicalized == attr.canonicalized).get), - newChild) + val newProject = Project(p.projectList ++ missingAttrs, newChild) newProject.copyTagsFrom(p) (newExprs, newProject) case a @ Aggregate(groupExprs, aggExprs, child) => - if (missingAttrsFromOutput.forall(attr => - groupExprs.exists(_.semanticEquals(attr)))) { + if (missingAttrs.forall(attr => groupExprs.exists(_.semanticEquals(attr)))) { // All the missing attributes are grouping expressions, valid case. (newExprs, - a.copy(aggregateExpressions = aggExprs ++ missingAttrsFromOutput, - child = newChild)) + a.copy(aggregateExpressions = aggExprs ++ missingAttrs, child = newChild)) } else { // Need to add non-grouping attributes, invalid case. (exprs, a) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index 23dbc5e2440d..cce385e8d9d1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -132,9 +132,6 @@ abstract class LogicalPlan private[this] lazy val outputAttributes = AttributeSeq.fromNormalOutput(output) - private[this] lazy val droppedAttributes = this.getTagValue( - LogicalPlan.DROPPED_NAMED_EXPRESSIONS).map(_.map(_.toAttribute)).getOrElse(Seq.empty) - private[this] lazy val outputMetadataAttributes = AttributeSeq(metadataOutput) /** @@ -157,8 +154,7 @@ abstract class LogicalPlan nameParts: Seq[String], resolver: Resolver): Option[NamedExpression] = outputAttributes.resolve(nameParts, resolver) - .orElse(outputMetadataAttributes.resolve(nameParts, resolver)).orElse( - droppedAttributes.resolve(nameParts, resolver)) + .orElse(outputMetadataAttributes.resolve(nameParts, resolver)) /** * Given an attribute name, split it to name parts by dot, but @@ -202,8 +198,6 @@ object LogicalPlan { // to the old code path. private[spark] val PLAN_ID_TAG = TreeNodeTag[Long]("plan_id") private[spark] val IS_METADATA_COL = TreeNodeTag[Unit]("is_metadata_col") - private[spark] val DROPPED_NAMED_EXPRESSIONS = - TreeNodeTag[Seq[NamedExpression]]("dropped_namedexprs") } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/EarlyCollapseProject.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/EarlyCollapseProject.scala index 3d6d348fd22d..dc94603d2a3d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/EarlyCollapseProject.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/EarlyCollapseProject.scala @@ -169,26 +169,12 @@ private[sql] object EarlyCollapseProject extends Rule[LogicalPlan] { remappedNewProjListResult match { case Success(remappedNewProjList) => val newProject = Project(remappedNewProjList, child) - val droppedNamedExprs = projList.filter(ne => - remappedNewProjList.forall(_.toAttribute != ne.toAttribute)) - val prevDroppedColsPart1 = p.getTagValue(LogicalPlan.DROPPED_NAMED_EXPRESSIONS). - getOrElse(Seq.empty) - // remove any attribs which have been added back in the new project list - val prevDroppedColsPart2 = prevDroppedColsPart1.filterNot(x => - remappedNewProjList.exists(y => y.toAttribute == x.toAttribute || y.name == x.name)) - val prevDroppedColsFinal = prevDroppedColsPart2.filterNot(x => - droppedNamedExprs.exists(y => y == x || y.name == x.name)) - val newDroppedList = droppedNamedExprs ++ prevDroppedColsFinal newProject.copyTagsFrom(p) // remove the datasetId copied from current P due to above copy newProject.unsetTagValue(Dataset.DATASET_ID_TAG) // use the dataset id of the incoming new project newP.getTagValue(Dataset.DATASET_ID_TAG).foreach(map => newProject.setTagValue(Dataset.DATASET_ID_TAG, map.clone())) - newProject.unsetTagValue(LogicalPlan.DROPPED_NAMED_EXPRESSIONS) - if (newDroppedList.nonEmpty) { - newProject.setTagValue(LogicalPlan.DROPPED_NAMED_EXPRESSIONS, newDroppedList) - } Option(newProject) case Failure(x) => if (Utils.isTesting) { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala index dd8c6678df12..fb586fc984a4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala @@ -227,8 +227,6 @@ abstract class BaseSessionStateBuilder( TableCapabilityCheck +: CommandCheck +: customCheckRules - - } /** From 5fea69d75e3e9a2202d421d5a1ab2982fdd385a5 Mon Sep 17 00:00:00 2001 From: ashahid Date: Tue, 2 Jan 2024 11:10:38 -0800 Subject: [PATCH 113/129] SPARK-45959. reverted the code of storing of dropped columns as its needed for column resolution --- .../analysis/ColumnResolutionHelper.scala | 27 ++++++++++++++----- .../catalyst/plans/logical/LogicalPlan.scala | 8 +++++- .../analysis/EarlyCollapseProject.scala | 14 ++++++++++ 3 files changed, 42 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala index a90c61565039..cc10bada2bec 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala @@ -59,24 +59,39 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase { val (newExprs, newChild) = { // Resolving expressions against current plan. val maybeResolvedExprs = exprs.map(resolveExpressionByPlanOutput(_, u)) + // Recursively resolving expressions on the child of current plan. resolveExprsAndAddMissingAttrs(maybeResolvedExprs, u.child) } // If some attributes used by expressions are resolvable only on the rewritten child // plan, we need to add them into original projection. - lazy val missingAttrs = - (AttributeSet(newExprs) -- u.outputSet).intersect(newChild.outputSet) + val (missingAttrsFromOutput, missingAttrsFromDroppedAttr) = { + val missing1 = AttributeSet(newExprs) -- u.outputSet + val fulfilledFromOutput = missing1.intersect(newChild.outputSet) + val missing2 = missing1 -- fulfilledFromOutput + val fulfilledFromDroppedCol = missing2.intersect(u.getTagValue( + LogicalPlan.DROPPED_NAMED_EXPRESSIONS). + map(sq => AttributeSet(sq.map(_.toAttribute))).getOrElse(AttributeSet.empty)) + fulfilledFromOutput -> fulfilledFromDroppedCol + } u match { case p: Project => - val newProject = Project(p.projectList ++ missingAttrs, newChild) + val droppedNamedExprs = p.getTagValue(LogicalPlan.DROPPED_NAMED_EXPRESSIONS). + getOrElse(Seq.empty) + val newProject = Project(p.projectList ++ missingAttrsFromOutput ++ + missingAttrsFromDroppedAttr.map(attr => + droppedNamedExprs.find(_.toAttribute.canonicalized == attr.canonicalized).get), + newChild) newProject.copyTagsFrom(p) (newExprs, newProject) - case a @ Aggregate(groupExprs, aggExprs, child) => - if (missingAttrs.forall(attr => groupExprs.exists(_.semanticEquals(attr)))) { + case a @ Aggregate(groupExprs, aggExprs, _) => + if (missingAttrsFromOutput.forall(attr => + groupExprs.exists(_.semanticEquals(attr)))) { // All the missing attributes are grouping expressions, valid case. (newExprs, - a.copy(aggregateExpressions = aggExprs ++ missingAttrs, child = newChild)) + a.copy(aggregateExpressions = aggExprs ++ missingAttrsFromOutput, + child = newChild)) } else { // Need to add non-grouping attributes, invalid case. (exprs, a) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index e1121d1f9026..31a614f905ca 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -133,6 +133,9 @@ abstract class LogicalPlan private[this] lazy val outputAttributes = AttributeSeq.fromNormalOutput(output) + private[this] lazy val droppedAttributes = this.getTagValue( + LogicalPlan.DROPPED_NAMED_EXPRESSIONS).map(_.map(_.toAttribute)).getOrElse(Seq.empty) + private[this] lazy val outputMetadataAttributes = AttributeSeq(metadataOutput) /** @@ -155,7 +158,8 @@ abstract class LogicalPlan nameParts: Seq[String], resolver: Resolver): Option[NamedExpression] = outputAttributes.resolve(nameParts, resolver) - .orElse(outputMetadataAttributes.resolve(nameParts, resolver)) + .orElse(outputMetadataAttributes.resolve(nameParts, resolver)).orElse( + droppedAttributes.resolve(nameParts, resolver)) /** * Given an attribute name, split it to name parts by dot, but @@ -199,6 +203,8 @@ object LogicalPlan { // to the old code path. private[spark] val PLAN_ID_TAG = TreeNodeTag[Long]("plan_id") private[spark] val IS_METADATA_COL = TreeNodeTag[Unit]("is_metadata_col") + private[spark] val DROPPED_NAMED_EXPRESSIONS = + TreeNodeTag[Seq[NamedExpression]]("dropped_namedexprs") } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/EarlyCollapseProject.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/EarlyCollapseProject.scala index dc94603d2a3d..3d6d348fd22d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/EarlyCollapseProject.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/EarlyCollapseProject.scala @@ -169,12 +169,26 @@ private[sql] object EarlyCollapseProject extends Rule[LogicalPlan] { remappedNewProjListResult match { case Success(remappedNewProjList) => val newProject = Project(remappedNewProjList, child) + val droppedNamedExprs = projList.filter(ne => + remappedNewProjList.forall(_.toAttribute != ne.toAttribute)) + val prevDroppedColsPart1 = p.getTagValue(LogicalPlan.DROPPED_NAMED_EXPRESSIONS). + getOrElse(Seq.empty) + // remove any attribs which have been added back in the new project list + val prevDroppedColsPart2 = prevDroppedColsPart1.filterNot(x => + remappedNewProjList.exists(y => y.toAttribute == x.toAttribute || y.name == x.name)) + val prevDroppedColsFinal = prevDroppedColsPart2.filterNot(x => + droppedNamedExprs.exists(y => y == x || y.name == x.name)) + val newDroppedList = droppedNamedExprs ++ prevDroppedColsFinal newProject.copyTagsFrom(p) // remove the datasetId copied from current P due to above copy newProject.unsetTagValue(Dataset.DATASET_ID_TAG) // use the dataset id of the incoming new project newP.getTagValue(Dataset.DATASET_ID_TAG).foreach(map => newProject.setTagValue(Dataset.DATASET_ID_TAG, map.clone())) + newProject.unsetTagValue(LogicalPlan.DROPPED_NAMED_EXPRESSIONS) + if (newDroppedList.nonEmpty) { + newProject.setTagValue(LogicalPlan.DROPPED_NAMED_EXPRESSIONS, newDroppedList) + } Option(newProject) case Failure(x) => if (Utils.isTesting) { From dc104f9221d0398385a8f2a29f643c5bb68a9fc6 Mon Sep 17 00:00:00 2001 From: ashahid Date: Tue, 2 Jan 2024 14:21:28 -0800 Subject: [PATCH 114/129] SPARK-45959. refactored tests --- .../sql/catalyst/analysis/Analyzer.scala | 6 +- .../apache/spark/sql/internal/SQLConf.scala | 10 ++ .../spark/sql/EarlyCollapseProjectSuite.scala | 115 ++++++++++-------- 3 files changed, 77 insertions(+), 54 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 71cc6e6e6fef..d49edde6fe1f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -59,6 +59,7 @@ import org.apache.spark.sql.types._ import org.apache.spark.sql.types.DayTimeIntervalType.DAY import org.apache.spark.sql.util.CaseInsensitiveStringMap import org.apache.spark.util.ArrayImplicits._ +import org.apache.spark.util.Utils /** * A trivial [[Analyzer]] with a dummy [[SessionCatalog]] and @@ -212,7 +213,10 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor AnalysisHelper.markInAnalyzer { val analyzed = executeAndTrack(plan, tracker) checkAnalysis(analyzed) - postAnalysisEarlyOptimizationRules.foldLeft(analyzed) { + val excludedPostAnalysisRulesConf = + conf.postAnalysisExcludesRules.toSeq.flatMap(Utils.stringToSeq) + postAnalysisEarlyOptimizationRules.filterNot( + rule => excludedPostAnalysisRulesConf.contains(rule.ruleName)).foldLeft(analyzed) { case(rs, rule) => rule(rs) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index d54cb3756638..176f1a1abb36 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -238,6 +238,14 @@ object SQLConf { } } + val EXCLUDE_POST_ANALYSIS_RULES = + buildConf("spark.sql.analyzer.excludePostAnalysisRules") + .internal() + .doc("The names of the comma separated post analysis rules to be excluded") + .version("3.5.0") + .stringConf + .createOptional + val ANALYZER_MAX_ITERATIONS = buildConf("spark.sql.analyzer.maxIterations") .internal() .doc("The max number of iterations the analyzer runs.") @@ -4764,6 +4772,8 @@ class SQLConf extends Serializable with Logging with SqlApiConf { def analyzerMaxIterations: Int = getConf(ANALYZER_MAX_ITERATIONS) + def postAnalysisExcludesRules: Option[String] = getConf(EXCLUDE_POST_ANALYSIS_RULES) + def optimizerExcludedRules: Option[String] = getConf(OPTIMIZER_EXCLUDED_RULES) def optimizerMaxIterations: Int = getConf(OPTIMIZER_MAX_ITERATIONS) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala index 953cae878e22..d299daa4e681 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala @@ -19,8 +19,10 @@ package org.apache.spark.sql import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper +import org.apache.spark.sql.execution.analysis.EarlyCollapseProject import org.apache.spark.sql.execution.columnar.InMemoryRelation import org.apache.spark.sql.functions.{col, lit} +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession @@ -29,174 +31,175 @@ class EarlyCollapseProjectSuite extends QueryTest import testImplicits._ val useCaching: Boolean = false test("withColumns: check no new project addition for simple columns addition") { - val baseDf = spark.range(20).select($"id" as "a", $"id" as "b") - checkProjectCollapseAndCacheUse(baseDf, + val baseDfCreator = () => spark.range(20).select($"id" as "a", $"id" as "b") + checkProjectCollapseAndCacheUse(baseDfCreator, df => df.withColumns(Seq("newCol1", "newCol2"), Seq(col("a") + 1, col("b") + 2))) } test("withColumns: check no new project addition if redefined alias is not used in" + " new columns") { - val baseDf = spark.range(20).select($"id" as "a", $"id" as "b").select($"a" + 1 as "a", - $"b") + val baseDfCreator = () => spark.range(20).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "a", $"b") - checkProjectCollapseAndCacheUse(baseDf, + checkProjectCollapseAndCacheUse(baseDfCreator, df => df.withColumns(Seq("newCol1"), Seq(col("b") + 2))) } test("withColumns: no new project addition if redefined alias is used in new columns - 1") { - val baseDf = spark.range(20).select($"id" as "a", $"id" as "b").select($"a" + 1 as "a", - $"b") + val baseDfCreator = () => spark.range(20).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "a", $"b") - checkProjectCollapseAndCacheUse(baseDf, + checkProjectCollapseAndCacheUse(baseDfCreator, df => df.withColumns(Seq("newCol1"), Seq(col("a") + 2))) } test("withColumns: no new project addition if redefined alias is used in new columns - 2") { - val baseDf = spark.range(20).select($"id" as "a", $"id" as "b"). + val baseDfCreator = () => spark.range(20).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b"). select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e") - checkProjectCollapseAndCacheUse(baseDf, + checkProjectCollapseAndCacheUse(baseDfCreator, df => df.withColumns(Seq("newCol1"), Seq(col("c") + 2 + col("a") * col("e")))) } test("withColumnRenamed: remap of column should not result in new project if the source" + " of remap is not used in other cols") { - val baseDf = spark.range(10).select($"id" as "a", $"id" as "b") - checkProjectCollapseAndCacheUse(baseDf, df => df.withColumnRenamed("a", "c")) + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b") + checkProjectCollapseAndCacheUse(baseDfCreator, df => df.withColumnRenamed("a", "c")) } test("withColumnRenamed: remap of column should not result in new project if the source" + " of remap is an attribute used in other cols") { - val baseDf = spark.range(10).select($"id" as "a", $"id" as "b"). + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b") - checkProjectCollapseAndCacheUse(baseDf, df => df.withColumnRenamed("a", "d")) + checkProjectCollapseAndCacheUse(baseDfCreator, df => df.withColumnRenamed("a", "d")) } test("withColumnRenamed: remap of column should not result in new project if the remap" + " is on an alias") { - val baseDf = spark.range(10).select($"id" as "a", $"id" as "b"). + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d" ) - checkProjectCollapseAndCacheUse(baseDf, df => df.withColumnRenamed("d", "x")) + checkProjectCollapseAndCacheUse(baseDfCreator, df => df.withColumnRenamed("d", "x")) } test("withColumnRenamed: remap of column should not result in new project if the remap" + " source an alias and that attribute is also projected as another attribute") { - val baseDf = spark.range(10).select($"id" as "a", $"id" as "b"). + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d"). select($"c", $"a", $"b", $"d", $"d" as "k") - checkProjectCollapseAndCacheUse(baseDf, df => df.withColumnRenamed("d", "x")) + checkProjectCollapseAndCacheUse(baseDfCreator, df => df.withColumnRenamed("d", "x")) } test("withColumnRenamed: test multi column remap") { - val baseDf = spark.range(10).select($"id" as "a", $"id" as "b"). + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") - checkProjectCollapseAndCacheUse(baseDf, + checkProjectCollapseAndCacheUse(baseDfCreator, df => df.withColumnsRenamed(Map("d" -> "x", "c" -> "k", "a" -> "u"))) } test("withColumns: test multi column addition") { - val baseDf = spark.range(10).select($"id" as "a", $"id" as "b"). + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") - checkProjectCollapseAndCacheUse(baseDf, + checkProjectCollapseAndCacheUse(baseDfCreator, df => df.withColumns( Seq("newCol1", "newCol2", "newCol3", "newCol4"), Seq(col("a") + 2, col("b") + 7, col("a") + col("b"), col("a") + col("d")))) } test("mix of column addition, rename and dropping") { - val baseDf = spark.range(10).select($"id" as "a", $"id" as "b"). + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") - checkProjectCollapseAndCacheUse(baseDf, + checkProjectCollapseAndCacheUse(baseDfCreator, df => df.select($"a" + $"d" as "newCol1", $"b" * $"a" as "newCol2", $"a" as "renameCola", $"c" * $"d" as "c", $"a")) } test("reuse of cache on mix of column addition, rename and dropping - 1") { - val baseDf = spark.range(10).select($"id" as "a", $"id" as "b"). + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") - checkProjectCollapseAndCacheUse(baseDf, + checkProjectCollapseAndCacheUse(baseDfCreator, df => df.select($"c" * $"d" as "c", $"a" + $"d" as "newCol1", $"b" * $"a" as "newCol2", $"a" as "renameCola", $"a")) } test("reuse of cache on mix of column addition, rename and dropping - 2") { - val baseDf = spark.range(10).select($"id" as "a", $"id" + 5 as "b"). + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" + 5 as "b"). select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") - checkProjectCollapseAndCacheUse(baseDf, + checkProjectCollapseAndCacheUse(baseDfCreator, df => df.select($"d", $"b" as "renameB", $"a" as "renameA", $"a" as "renameColA")) } test("reuse of cache on mix of column addition, rename and dropping - 3") { - val baseDf = spark.range(10).select($"id" as "a", $"id" + 5 as "b"). + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" + 5 as "b"). select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") - checkProjectCollapseAndCacheUse(baseDf, + checkProjectCollapseAndCacheUse(baseDfCreator, df => df.select($"d" * $"a" as "d", $"b" as "renameB", $"a" * $"d" as "renameA", $"a" as "renameColA")) } test("reuse of cache on mix of column addition, rename and dropping - 4") { - val baseDf = spark.range(10).select($"id" as "a", $"id" + 5 as "b"). + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" + 5 as "b"). select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") - checkProjectCollapseAndCacheUse(baseDf, df => df.select($"c")) + checkProjectCollapseAndCacheUse(baseDfCreator, df => df.select($"c")) } test("reuse of cache on mix of column addition, rename and dropping - 5") { - val baseDf = spark.range(10).select($"id" as "a", $"id" + 5 as "b"). + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" + 5 as "b"). select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") - checkProjectCollapseAndCacheUse(baseDf, df => df.select($"d" * 7 as "a")) + checkProjectCollapseAndCacheUse(baseDfCreator, df => df.select($"d" * 7 as "a")) } test("reuse of cache on mix of column addition, rename and dropping - 6") { - val baseDf = spark.range(10).select($"id" as "a", $"id" + 5 as "b"). + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" + 5 as "b"). select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") - checkProjectCollapseAndCacheUse(baseDf, df => df.select($"d" * 7 as "a", $"d" * 7 as "b", + checkProjectCollapseAndCacheUse(baseDfCreator, df => df.select($"d" * 7 as "a", $"d" * 7 as "b", $"b" + $"a" as "e")) } test("reuse of cache on mix of column addition, rename and dropping - 7") { - val baseDf = spark.range(10).select($"id" as "a", $"id" + 5 as "b"). + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" + 5 as "b"). select($"a" + $"b" as "c", $"a", $"b").select( lit(9) as "e", $"c", lit(11) as "a", $"b", $"c" * $"a" * $"b" as "d") - checkProjectCollapseAndCacheUse(baseDf, df => df.select($"a" as "a1", lit(7) as "d1", + checkProjectCollapseAndCacheUse(baseDfCreator, df => df.select($"a" as "a1", lit(7) as "d1", $"b" as "b1", $"c" * $"a" as "c", lit(13) as "f")) } test("use of cached InMemoryRelation when new columns added do not result in new project -1") { - val baseDf = spark.range(10).select($"id" as "a", $"id" as "b"). + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") - checkProjectCollapseAndCacheUse(baseDf, df => df.withColumns( + checkProjectCollapseAndCacheUse(baseDfCreator, df => df.withColumns( Seq("newCol1", "newCol2", "newCol3", "newCol4"), Seq(col("a") + 2, col("b") + 7, col("a") + col("b"), col("a") + col("d")))) } test("use of cached InMemoryRelation when new columns added do not result in new project -2") { - val baseDf = spark.range(20).select($"id" as "a", $"id" as "b"). + val baseDfCreator = () => spark.range(20).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b"). select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e") - checkProjectCollapseAndCacheUse(baseDf, + checkProjectCollapseAndCacheUse(baseDfCreator, df => df.withColumns(Seq("newCol1"), Seq(col("c") + 2 + col("a") * col("e")))) } test("use of cached InMemoryRelation when new columns added do not result in new project, with" + "positions changed") { - val baseDf = spark.range(20).select($"id" as "a", $"id" as "b"). + val baseDfCreator = () => spark.range(20).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b"). select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e") - checkProjectCollapseAndCacheUse(baseDf, + checkProjectCollapseAndCacheUse(baseDfCreator, df => df.select( $"e", $"a", $"c" + 2 + $"a" * $"e" as "newCol", $"c", $"d", $"b")) } test("use of cached InMemoryRelation when renamed columns do not result in new project") { - val baseDf = spark.range(10).select($"id" as "a", $"id" as "b"). + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") - checkProjectCollapseAndCacheUse(baseDf, df => df.withColumnsRenamed( + checkProjectCollapseAndCacheUse(baseDfCreator, df => df.withColumnsRenamed( Map("c" -> "c1", "a" -> "a1", "b" -> "b1", "d" -> "d1"))) } private def checkProjectCollapseAndCacheUse( - baseDf: DataFrame, + baseDfCreator: () => DataFrame, testExec: DataFrame => DataFrame): Unit = { + val baseDf = baseDfCreator() if (useCaching) { baseDf.cache() } @@ -211,6 +214,15 @@ class EarlyCollapseProjectSuite extends QueryTest assert(newDfOpt.queryExecution.optimizedPlan.collectLeaves().head. isInstanceOf[InMemoryRelation]) } + // now check if the results of optimized dataframe and completely unoptimized dataframe are same + val fullyUnopt = withSQLConf( + SQLConf.EXCLUDE_POST_ANALYSIS_RULES.key -> EarlyCollapseProject.ruleName) { + testExec(baseDfCreator()) + } + + assert(collectNodes(fullyUnopt).size >= nonOptDfNodes.size) + checkAnswer(newDfOpt, fullyUnopt) + } private def getComparableDataFrames( @@ -219,13 +231,10 @@ class EarlyCollapseProjectSuite extends QueryTest // first obtain optimized transformation which avoids adding new project val newDfOpt = transformation(baseDf) // then obtain optimized transformation which adds new project - val logicalPlan = baseDf.logicalPlan - val newDfUnopt = try { - // add a plan id tag which will cause skipping of EarlyCollapseProject rule - logicalPlan.setTagValue[Long](LogicalPlan.PLAN_ID_TAG, 100L) + + val newDfUnopt = withSQLConf( + SQLConf.EXCLUDE_POST_ANALYSIS_RULES.key -> EarlyCollapseProject.ruleName) { transformation(baseDf) - } finally { - logicalPlan.unsetTagValue(LogicalPlan.PLAN_ID_TAG) } (newDfOpt, newDfUnopt) } From 66f5c8ceb2ca2c130b1403e09baa52898d7a6196 Mon Sep 17 00:00:00 2001 From: ashahid Date: Tue, 2 Jan 2024 15:25:00 -0800 Subject: [PATCH 115/129] SPARK-45959. refactored tests --- .../spark/sql/EarlyCollapseProjectSuite.scala | 54 +++++++++++++------ 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala index d299daa4e681..3eb325b5a071 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan} import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.execution.analysis.EarlyCollapseProject import org.apache.spark.sql.execution.columnar.InMemoryRelation @@ -113,7 +113,7 @@ class EarlyCollapseProjectSuite extends QueryTest $"a" as "renameCola", $"c" * $"d" as "c", $"a")) } - test("reuse of cache on mix of column addition, rename and dropping - 1") { + test("mix of column addition, rename and dropping - 1") { val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") checkProjectCollapseAndCacheUse(baseDfCreator, @@ -121,14 +121,14 @@ class EarlyCollapseProjectSuite extends QueryTest $"a" as "renameCola", $"a")) } - test("reuse of cache on mix of column addition, rename and dropping - 2") { + test("mix of column addition, rename and dropping - 2") { val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" + 5 as "b"). select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") checkProjectCollapseAndCacheUse(baseDfCreator, df => df.select($"d", $"b" as "renameB", $"a" as "renameA", $"a" as "renameColA")) } - test("reuse of cache on mix of column addition, rename and dropping - 3") { + test("mix of column addition, rename and dropping - 3") { val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" + 5 as "b"). select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") checkProjectCollapseAndCacheUse(baseDfCreator, @@ -136,26 +136,26 @@ class EarlyCollapseProjectSuite extends QueryTest $"a" as "renameColA")) } - test("reuse of cache on mix of column addition, rename and dropping - 4") { + test("mix of column addition, rename and dropping - 4") { val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" + 5 as "b"). select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") checkProjectCollapseAndCacheUse(baseDfCreator, df => df.select($"c")) } - test("reuse of cache on mix of column addition, rename and dropping - 5") { + test("mix of column addition, rename and dropping - 5") { val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" + 5 as "b"). select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") checkProjectCollapseAndCacheUse(baseDfCreator, df => df.select($"d" * 7 as "a")) } - test("reuse of cache on mix of column addition, rename and dropping - 6") { + test("mix of column addition, rename and dropping - 6") { val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" + 5 as "b"). select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") checkProjectCollapseAndCacheUse(baseDfCreator, df => df.select($"d" * 7 as "a", $"d" * 7 as "b", $"b" + $"a" as "e")) } - test("reuse of cache on mix of column addition, rename and dropping - 7") { + test("mix of column addition, rename and dropping - 7") { val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" + 5 as "b"). select($"a" + $"b" as "c", $"a", $"b").select( lit(9) as "e", $"c", lit(11) as "a", $"b", $"c" * $"a" * $"b" as "d") @@ -163,7 +163,7 @@ class EarlyCollapseProjectSuite extends QueryTest $"b" as "b1", $"c" * $"a" as "c", lit(13) as "f")) } - test("use of cached InMemoryRelation when new columns added do not result in new project -1") { + test("new columns added do not result in new project -1") { val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") checkProjectCollapseAndCacheUse(baseDfCreator, df => df.withColumns( @@ -171,7 +171,7 @@ class EarlyCollapseProjectSuite extends QueryTest Seq(col("a") + 2, col("b") + 7, col("a") + col("b"), col("a") + col("d")))) } - test("use of cached InMemoryRelation when new columns added do not result in new project -2") { + test("new columns added do not result in new project -2") { val baseDfCreator = () => spark.range(20).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b"). select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e") @@ -179,8 +179,7 @@ class EarlyCollapseProjectSuite extends QueryTest df => df.withColumns(Seq("newCol1"), Seq(col("c") + 2 + col("a") * col("e")))) } - test("use of cached InMemoryRelation when new columns added do not result in new project, with" + - "positions changed") { + test("new columns added do not result in new project, with positions changed") { val baseDfCreator = () => spark.range(20).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b"). select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e") @@ -188,7 +187,7 @@ class EarlyCollapseProjectSuite extends QueryTest df => df.select( $"e", $"a", $"c" + 2 + $"a" * $"e" as "newCol", $"c", $"d", $"b")) } - test("use of cached InMemoryRelation when renamed columns do not result in new project") { + test("renamed columns do not result in new project") { val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") @@ -196,6 +195,26 @@ class EarlyCollapseProjectSuite extends QueryTest Map("c" -> "c1", "a" -> "a1", "b" -> "b1", "d" -> "d1"))) } + test("resurrection of intermediate dropped cols when used in filter") { + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"b").select($"c", $"b", $"c" + 7 as "d") + // A dropped column would result in a new project being added on top of filter + // so we have to take into account of that extra project added while checking + // assertion of init node size and optimized df nodes size + checkProjectCollapseAndCacheUse(baseDfCreator, df => df.withColumnsRenamed( + Map("c" -> "c1", "b" -> "b1", "d" -> "d1")).filter($"a" > 5)) + } + + test("resurrection of right renamed intermediate dropped cols when used in filter") { + val baseDfCreator = () => spark.range(10).select($"id" + 7 as "a", $"id" as "b"). + select($"a" + 1 as "c", $"b", $"a" * $"b" as "a").select($"c", $"b", $"c" + 7 as "d") + // A dropped column would result in a new project being added on top of filter + // so we have to take into account of that extra project added while checking + // assertion of init node size and optimized df nodes size + checkProjectCollapseAndCacheUse(baseDfCreator, df => df.withColumnsRenamed( + Map("c" -> "c1", "b" -> "b1", "d" -> "d1")).select($"c1", $"d1").filter($"a" > 25)) + } + private def checkProjectCollapseAndCacheUse( baseDfCreator: () => DataFrame, testExec: DataFrame => DataFrame): Unit = { @@ -207,8 +226,11 @@ class EarlyCollapseProjectSuite extends QueryTest val (newDfOpt, newDfUnopt) = getComparableDataFrames(baseDf, testExec) val optDfNodes = collectNodes(newDfOpt) val nonOptDfNodes = collectNodes(newDfUnopt) - assert(initNodes.size === optDfNodes.size) - assert(nonOptDfNodes.size === optDfNodes.size + 1) + val foundFilterNodes = optDfNodes.exists(_.isInstanceOf[Filter]) + if (!foundFilterNodes) { + assert(initNodes.size === optDfNodes.size) + } + assert(nonOptDfNodes.size > optDfNodes.size) checkAnswer(newDfOpt, newDfUnopt) if (useCaching) { assert(newDfOpt.queryExecution.optimizedPlan.collectLeaves().head. @@ -219,10 +241,8 @@ class EarlyCollapseProjectSuite extends QueryTest SQLConf.EXCLUDE_POST_ANALYSIS_RULES.key -> EarlyCollapseProject.ruleName) { testExec(baseDfCreator()) } - assert(collectNodes(fullyUnopt).size >= nonOptDfNodes.size) checkAnswer(newDfOpt, fullyUnopt) - } private def getComparableDataFrames( From cf859c6ca7b5df956225ef880801743bef08b0aa Mon Sep 17 00:00:00 2001 From: ashahid Date: Wed, 3 Jan 2024 13:13:25 -0800 Subject: [PATCH 116/129] SPARK-45959. rectified the golden files --- .../column-resolution-aggregate.sql.out | 15 +- .../analyzer-results/natural-join.sql.out | 436 +++++----- .../sql-tests/analyzer-results/pivot.sql.out | 536 ++++++------ .../udf/udf-natural-join.sql.out | 31 +- .../analyzer-results/udf/udf-pivot.sql.out | 536 ++++++------ .../analyzer-results/using-join.sql.out | 793 +++++++++--------- 6 files changed, 1133 insertions(+), 1214 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out index 39ab8aa835c4..e7c304fe10b0 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out @@ -114,14 +114,13 @@ org.apache.spark.sql.AnalysisException -- !query SELECT k AS lca, lca + 1 AS col FROM v1 GROUP BY lca -- !query analysis -Project [lca#x, (lca#x + 1) AS col#x] -+- Project [k#x, k#x AS lca#x] - +- Aggregate [k#x], [k#x] - +- SubqueryAlias v1 - +- View (`v1`, [a#x, b#x, k#x]) - +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x, cast(k#x as int) AS k#x] - +- SubqueryAlias t - +- LocalRelation [a#x, b#x, k#x] +Project [k#x AS lca#x, (k#x + 1) AS col#x] ++- Aggregate [k#x], [k#x] + +- SubqueryAlias v1 + +- View (`v1`, [a#x, b#x, k#x]) + +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x, cast(k#x as int) AS k#x] + +- SubqueryAlias t + +- LocalRelation [a#x, b#x, k#x] -- !query diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out index f8da5c519635..8fab5e05dac7 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out @@ -71,6 +71,25 @@ CreateViewCommand `nt4`, select * from values SELECT * FROM nt1 natural join nt2 -- !query analysis Project [k#x, v1#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] + + +-- !query +SELECT * FROM nt1 natural join nt2 where k = "one" +-- !query analysis +Filter (k#x = one) +- Project [k#x, v1#x, v2#x] +- Join Inner, (k#x = k#x) :- SubqueryAlias nt1 @@ -87,27 +106,6 @@ Project [k#x, v1#x, v2#x] +- LocalRelation [k#x, v2#x] --- !query -SELECT * FROM nt1 natural join nt2 where k = "one" --- !query analysis -Project [k#x, v1#x, v2#x] -+- Filter (k#x = one) - +- Project [k#x, v1#x, v2#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] - - -- !query SELECT * FROM nt1 natural left join nt2 order by v1, v2 -- !query analysis @@ -174,7 +172,26 @@ Aggregate [count(1) AS count(1)#xL] SELECT k FROM nt1 natural join nt2 -- !query analysis Project [k#x] -+- Project [k#x, v1#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] + + +-- !query +SELECT k FROM nt1 natural join nt2 where k = "one" +-- !query analysis +Filter (k#x = one) ++- Project [k#x] +- Join Inner, (k#x = k#x) :- SubqueryAlias nt1 : +- View (`nt1`, [k#x, v1#x]) @@ -190,65 +207,42 @@ Project [k#x] +- LocalRelation [k#x, v2#x] --- !query -SELECT k FROM nt1 natural join nt2 where k = "one" --- !query analysis -Project [k#x] -+- Filter (k#x = one) - +- Project [k#x, v1#x, v2#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] - - -- !query SELECT nt1.* FROM nt1 natural join nt2 -- !query analysis Project [k#x, v1#x] -+- Project [k#x, v1#x, v2#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt2.* FROM nt1 natural join nt2 -- !query analysis Project [k#x, v2#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -299,60 +293,57 @@ Project [k#x] SELECT nt1.*, nt2.* FROM nt1 natural join nt2 -- !query analysis Project [k#x, v1#x, k#x, v2#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT *, nt2.k FROM nt1 natural join nt2 -- !query analysis Project [k#x, v1#x, v2#x, k#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.k, nt2.k FROM nt1 natural join nt2 -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -401,22 +392,21 @@ Sort [key#x ASC NULLS FIRST], true -- !query SELECT nt1.k, nt2.k FROM nt1 natural join nt2 where k = "one" -- !query analysis -Project [k#x, k#x] -+- Filter (k#x = one) - +- Project [k#x, v1#x, v2#x, k#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] +Filter (k#x = one) ++- Project [k#x, k#x] + +- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -510,56 +500,54 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException SELECT * FROM nt1 natural join nt2 natural join nt3 -- !query analysis Project [k#x, v1#x, v2#x, v3#x] -+- Project [k#x, v1#x, v2#x, v3#x] - +- Join Inner, (k#x = k#x) - :- Project [k#x, v1#x, v2#x] - : +- Join Inner, (k#x = k#x) - : :- SubqueryAlias nt1 - : : +- View (`nt1`, [k#x, v1#x]) - : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : : +- Project [k#x, v1#x] - : : +- SubqueryAlias nt1 - : : +- LocalRelation [k#x, v1#x] - : +- SubqueryAlias nt2 - : +- View (`nt2`, [k#x, v2#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - : +- Project [k#x, v2#x] - : +- SubqueryAlias nt2 - : +- LocalRelation [k#x, v2#x] - +- SubqueryAlias nt3 - +- View (`nt3`, [k#x, v3#x]) - +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] - +- Project [k#x, v3#x] - +- SubqueryAlias nt3 - +- LocalRelation [k#x, v3#x] ++- Join Inner, (k#x = k#x) + :- Project [k#x, v1#x, v2#x] + : +- Join Inner, (k#x = k#x) + : :- SubqueryAlias nt1 + : : +- View (`nt1`, [k#x, v1#x]) + : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : : +- Project [k#x, v1#x] + : : +- SubqueryAlias nt1 + : : +- LocalRelation [k#x, v1#x] + : +- SubqueryAlias nt2 + : +- View (`nt2`, [k#x, v2#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + : +- Project [k#x, v2#x] + : +- SubqueryAlias nt2 + : +- LocalRelation [k#x, v2#x] + +- SubqueryAlias nt3 + +- View (`nt3`, [k#x, v3#x]) + +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] + +- Project [k#x, v3#x] + +- SubqueryAlias nt3 + +- LocalRelation [k#x, v3#x] -- !query SELECT nt1.*, nt2.*, nt3.* FROM nt1 natural join nt2 natural join nt3 -- !query analysis Project [k#x, v1#x, k#x, v2#x, k#x, v3#x] -+- Project [k#x, v1#x, v2#x, v3#x, k#x, k#x] - +- Join Inner, (k#x = k#x) - :- Project [k#x, v1#x, v2#x, k#x] - : +- Join Inner, (k#x = k#x) - : :- SubqueryAlias nt1 - : : +- View (`nt1`, [k#x, v1#x]) - : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : : +- Project [k#x, v1#x] - : : +- SubqueryAlias nt1 - : : +- LocalRelation [k#x, v1#x] - : +- SubqueryAlias nt2 - : +- View (`nt2`, [k#x, v2#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - : +- Project [k#x, v2#x] - : +- SubqueryAlias nt2 - : +- LocalRelation [k#x, v2#x] - +- SubqueryAlias nt3 - +- View (`nt3`, [k#x, v3#x]) - +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] - +- Project [k#x, v3#x] - +- SubqueryAlias nt3 - +- LocalRelation [k#x, v3#x] ++- Join Inner, (k#x = k#x) + :- Project [k#x, v1#x, v2#x, k#x] + : +- Join Inner, (k#x = k#x) + : :- SubqueryAlias nt1 + : : +- View (`nt1`, [k#x, v1#x]) + : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : : +- Project [k#x, v1#x] + : : +- SubqueryAlias nt1 + : : +- LocalRelation [k#x, v1#x] + : +- SubqueryAlias nt2 + : +- View (`nt2`, [k#x, v2#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + : +- Project [k#x, v2#x] + : +- SubqueryAlias nt2 + : +- LocalRelation [k#x, v2#x] + +- SubqueryAlias nt3 + +- View (`nt3`, [k#x, v3#x]) + +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] + +- Project [k#x, v3#x] + +- SubqueryAlias nt3 + +- LocalRelation [k#x, v3#x] -- !query @@ -620,61 +608,59 @@ Project [k#x, v1#x, v2#x, k#x, v3#x] SELECT * FROM nt1 natural join nt2 join nt3 on nt2.k = nt3.k -- !query analysis Project [k#x, v1#x, v2#x, k#x, v3#x] -+- Project [k#x, v1#x, v2#x, k#x, v3#x] - +- Join Inner, (k#x = k#x) - :- Project [k#x, v1#x, v2#x, k#x] - : +- Join Inner, (k#x = k#x) - : :- SubqueryAlias nt1 - : : +- View (`nt1`, [k#x, v1#x]) - : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : : +- Project [k#x, v1#x] - : : +- SubqueryAlias nt1 - : : +- LocalRelation [k#x, v1#x] - : +- SubqueryAlias nt2 - : +- View (`nt2`, [k#x, v2#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - : +- Project [k#x, v2#x] - : +- SubqueryAlias nt2 - : +- LocalRelation [k#x, v2#x] - +- SubqueryAlias nt3 - +- View (`nt3`, [k#x, v3#x]) - +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] - +- Project [k#x, v3#x] - +- SubqueryAlias nt3 - +- LocalRelation [k#x, v3#x] ++- Join Inner, (k#x = k#x) + :- Project [k#x, v1#x, v2#x, k#x] + : +- Join Inner, (k#x = k#x) + : :- SubqueryAlias nt1 + : : +- View (`nt1`, [k#x, v1#x]) + : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : : +- Project [k#x, v1#x] + : : +- SubqueryAlias nt1 + : : +- LocalRelation [k#x, v1#x] + : +- SubqueryAlias nt2 + : +- View (`nt2`, [k#x, v2#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + : +- Project [k#x, v2#x] + : +- SubqueryAlias nt2 + : +- LocalRelation [k#x, v2#x] + +- SubqueryAlias nt3 + +- View (`nt3`, [k#x, v3#x]) + +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] + +- Project [k#x, v3#x] + +- SubqueryAlias nt3 + +- LocalRelation [k#x, v3#x] -- !query SELECT nt1.*, nt2.*, nt3.*, nt4.* FROM nt1 natural join nt2 natural join nt3 natural join nt4 -- !query analysis Project [k#x, v1#x, k#x, v2#x, k#x, v3#x, k#x, v4#x] -+- Project [k#x, v1#x, v2#x, v3#x, v4#x, k#x, k#x, k#x] - +- Join Inner, (k#x = k#x) - :- Project [k#x, v1#x, v2#x, v3#x, k#x, k#x] - : +- Join Inner, (k#x = k#x) - : :- Project [k#x, v1#x, v2#x, k#x] - : : +- Join Inner, (k#x = k#x) - : : :- SubqueryAlias nt1 - : : : +- View (`nt1`, [k#x, v1#x]) - : : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : : : +- Project [k#x, v1#x] - : : : +- SubqueryAlias nt1 - : : : +- LocalRelation [k#x, v1#x] - : : +- SubqueryAlias nt2 - : : +- View (`nt2`, [k#x, v2#x]) - : : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - : : +- Project [k#x, v2#x] - : : +- SubqueryAlias nt2 - : : +- LocalRelation [k#x, v2#x] - : +- SubqueryAlias nt3 - : +- View (`nt3`, [k#x, v3#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] - : +- Project [k#x, v3#x] - : +- SubqueryAlias nt3 - : +- LocalRelation [k#x, v3#x] - +- SubqueryAlias nt4 - +- View (`nt4`, [k#x, v4#x]) - +- Project [cast(k#x as string) AS k#x, cast(v4#x as int) AS v4#x] - +- Project [k#x, v4#x] - +- SubqueryAlias nt4 - +- LocalRelation [k#x, v4#x] ++- Join Inner, (k#x = k#x) + :- Project [k#x, v1#x, v2#x, v3#x, k#x, k#x] + : +- Join Inner, (k#x = k#x) + : :- Project [k#x, v1#x, v2#x, k#x] + : : +- Join Inner, (k#x = k#x) + : : :- SubqueryAlias nt1 + : : : +- View (`nt1`, [k#x, v1#x]) + : : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : : : +- Project [k#x, v1#x] + : : : +- SubqueryAlias nt1 + : : : +- LocalRelation [k#x, v1#x] + : : +- SubqueryAlias nt2 + : : +- View (`nt2`, [k#x, v2#x]) + : : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + : : +- Project [k#x, v2#x] + : : +- SubqueryAlias nt2 + : : +- LocalRelation [k#x, v2#x] + : +- SubqueryAlias nt3 + : +- View (`nt3`, [k#x, v3#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] + : +- Project [k#x, v3#x] + : +- SubqueryAlias nt3 + : +- LocalRelation [k#x, v3#x] + +- SubqueryAlias nt4 + +- View (`nt4`, [k#x, v4#x]) + +- Project [cast(k#x as string) AS k#x, cast(v4#x as int) AS v4#x] + +- Project [k#x, v4#x] + +- SubqueryAlias nt4 + +- LocalRelation [k#x, v4#x] diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out index c0a02dc29d60..b112a2c80c25 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out @@ -59,18 +59,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET#xL, Java#xL] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java#xL] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java#xL] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -80,16 +79,15 @@ PIVOT ( FOR year IN (2012, 2013) ) -- !query analysis -Project [course#x, 2012#xL, 2013#xL] -+- Project [course#x, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[0] AS 2012#xL, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[1] AS 2013#xL] - +- Aggregate [course#x], [course#x, pivotfirst(year#x, sum(coursesales.earnings)#xL, 2012, 2013, 0, 0) AS __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x] - +- Aggregate [course#x, year#x], [course#x, year#x, sum(earnings#x) AS sum(coursesales.earnings)#xL] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [course#x, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[0] AS 2012#xL, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[1] AS 2013#xL] ++- Aggregate [course#x], [course#x, pivotfirst(year#x, sum(coursesales.earnings)#xL, 2012, 2013, 0, 0) AS __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x] + +- Aggregate [course#x, year#x], [course#x, year#x, sum(earnings#x) AS sum(coursesales.earnings)#xL] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -101,18 +99,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET_sum(earnings)#xL, dotNET_avg(earnings)#x, Java_sum(earnings)#xL, Java_avg(earnings)#x] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_avg(earnings)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x[1] AS Java_avg(earnings)#x] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, avg(__auto_generated_subquery_name.earnings)#x, dotNET, Java, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, avg(earnings#x) AS avg(__auto_generated_subquery_name.earnings)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_avg(earnings)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x[1] AS Java_avg(earnings)#x] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, avg(__auto_generated_subquery_name.earnings)#x, dotNET, Java, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, avg(earnings#x) AS avg(__auto_generated_subquery_name.earnings)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -124,18 +121,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [dotNET#xL, Java#xL] -+- Project [__pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java#xL] - +- Aggregate [pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [course#x], [course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java#xL] ++- Aggregate [pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [course#x], [course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -147,18 +143,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [dotNET_sum(earnings)#xL, dotNET_min(year)#x, Java_sum(earnings)#xL, Java_min(year)#x] -+- Project [__pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x[0] AS dotNET_min(year)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x[1] AS Java_min(year)#x] - +- Aggregate [pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, min(__auto_generated_subquery_name.year)#x, dotNET, Java, 0, 0) AS __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x] - +- Aggregate [course#x], [course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, min(year#x) AS min(__auto_generated_subquery_name.year)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x[0] AS dotNET_min(year)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x[1] AS Java_min(year)#x] ++- Aggregate [pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, min(__auto_generated_subquery_name.year)#x, dotNET, Java, 0, 0) AS __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x] + +- Aggregate [course#x], [course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, min(year#x) AS min(__auto_generated_subquery_name.year)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -172,25 +167,24 @@ PIVOT ( FOR s IN (1, 2) ) -- !query analysis -Project [course#x, year#x, 1#xL, 2#xL] -+- Project [course#x, year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS 1#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS 2#xL] - +- Aggregate [course#x, year#x], [course#x, year#x, pivotfirst(s#x, sum(__auto_generated_subquery_name.earnings)#xL, 1, 2, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [course#x, year#x, s#x], [course#x, year#x, s#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [course#x, year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS 1#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS 2#xL] ++- Aggregate [course#x, year#x], [course#x, year#x, pivotfirst(s#x, sum(__auto_generated_subquery_name.earnings)#xL, 1, 2, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [course#x, year#x, s#x], [course#x, year#x, s#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -204,25 +198,24 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET_sum(earnings)#xL, dotNET_min(s)#x, Java_sum(earnings)#xL, Java_min(s)#x] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x[0] AS dotNET_min(s)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x[1] AS Java_min(s)#x] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, min(__auto_generated_subquery_name.s)#x, dotNET, Java, 0, 0) AS __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, min(s#x) AS min(__auto_generated_subquery_name.s)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x[0] AS dotNET_min(s)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x[1] AS Java_min(s)#x] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, min(__auto_generated_subquery_name.s)#x, dotNET, Java, 0, 0) AS __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, min(s#x) AS min(__auto_generated_subquery_name.s)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -236,25 +229,24 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET#xL, Java#xL] -+- Project [year#x, __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x[0] AS dotNET#xL, __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x[1] AS Java#xL] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))#xL, dotNET, Java, 0, 0) AS __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, sum((earnings#x * s#x)) AS sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x[0] AS dotNET#xL, __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x[1] AS Java#xL] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))#xL, dotNET, Java, 0, 0) AS __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, sum((earnings#x * s#x)) AS sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -266,18 +258,17 @@ PIVOT ( FOR y IN (2012, 2013) ) -- !query analysis -Project [2012_s#xL, 2013_s#xL, 2012_a#x, 2013_a#x, c#x] -+- Project [c#x, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[0] AS 2012_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[0] AS 2012_a#x, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[1] AS 2013_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[1] AS 2013_a#x] - +- Aggregate [c#x], [c#x, pivotfirst(y#x, sum(__auto_generated_subquery_name.e) AS s#xL, 2012, 2013, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x, pivotfirst(y#x, avg(__auto_generated_subquery_name.e) AS a#x, 2012, 2013, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x] - +- Aggregate [c#x, y#x], [c#x, y#x, sum(e#x) AS sum(__auto_generated_subquery_name.e) AS s#xL, avg(e#x) AS avg(__auto_generated_subquery_name.e) AS a#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[0] AS 2012_s#xL, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[1] AS 2013_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[0] AS 2012_a#x, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[1] AS 2013_a#x, c#x] ++- Aggregate [c#x], [c#x, pivotfirst(y#x, sum(__auto_generated_subquery_name.e) AS s#xL, 2012, 2013, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x, pivotfirst(y#x, avg(__auto_generated_subquery_name.e) AS a#x, 2012, 2013, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x] + +- Aggregate [c#x, y#x], [c#x, y#x, sum(e#x) AS sum(__auto_generated_subquery_name.e) AS s#xL, avg(e#x) AS avg(__auto_generated_subquery_name.e) AS a#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -289,18 +280,17 @@ PIVOT ( FOR y IN (2012 as firstYear, 2013 secondYear) ) -- !query analysis -Project [firstYear_s#xL, secondYear_s#xL, firstYear_a#x, secondYear_a#x, c#x] -+- Project [c#x, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[0] AS firstYear_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[0] AS firstYear_a#x, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[1] AS secondYear_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[1] AS secondYear_a#x] - +- Aggregate [c#x], [c#x, pivotfirst(y#x, sum(__auto_generated_subquery_name.e) AS s#xL, 2012, 2013, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x, pivotfirst(y#x, avg(__auto_generated_subquery_name.e) AS a#x, 2012, 2013, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x] - +- Aggregate [c#x, y#x], [c#x, y#x, sum(e#x) AS sum(__auto_generated_subquery_name.e) AS s#xL, avg(e#x) AS avg(__auto_generated_subquery_name.e) AS a#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[0] AS firstYear_s#xL, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[1] AS secondYear_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[0] AS firstYear_a#x, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[1] AS secondYear_a#x, c#x] ++- Aggregate [c#x], [c#x, pivotfirst(y#x, sum(__auto_generated_subquery_name.e) AS s#xL, 2012, 2013, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x, pivotfirst(y#x, avg(__auto_generated_subquery_name.e) AS a#x, 2012, 2013, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x] + +- Aggregate [c#x, y#x], [c#x, y#x, sum(e#x) AS sum(__auto_generated_subquery_name.e) AS s#xL, avg(e#x) AS avg(__auto_generated_subquery_name.e) AS a#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -373,18 +363,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET_CEIL(sum(earnings))#xL, dotNET_a1#x, Java_CEIL(sum(earnings))#xL, Java_a1#x] -+- Project [year#x, __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x[0] AS dotNET_CEIL(sum(earnings))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[0] AS dotNET_a1#x, __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x[1] AS Java_CEIL(sum(earnings))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[1] AS Java_a1#x] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, CEIL(sum(__auto_generated_subquery_name.earnings))#xL, dotNET, Java, 0, 0) AS __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x, pivotfirst(course#x, (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x, dotNET, Java, 0, 0) AS __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, CEIL(sum(earnings#x)) AS CEIL(sum(__auto_generated_subquery_name.earnings))#xL, (avg(earnings#x) + cast(1 as double)) AS (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [year#x, __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x[0] AS dotNET_CEIL(sum(earnings))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[0] AS dotNET_a1#x, __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x[1] AS Java_CEIL(sum(earnings))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[1] AS Java_a1#x] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, CEIL(sum(__auto_generated_subquery_name.earnings))#xL, dotNET, Java, 0, 0) AS __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x, pivotfirst(course#x, (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x, dotNET, Java, 0, 0) AS __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, CEIL(sum(earnings#x)) AS CEIL(sum(__auto_generated_subquery_name.earnings))#xL, (avg(earnings#x) + cast(1 as double)) AS (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -421,25 +410,24 @@ PIVOT ( FOR (course, year) IN (('dotNET', 2012), ('Java', 2013)) ) -- !query analysis -Project [s#x, {dotNET, 2012}#xL, {Java, 2013}#xL] -+- Project [s#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, 2012}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, 2013}#xL] - +- Aggregate [s#x], [s#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,2012], [Java,2013], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [s#x, named_struct(course, course#x, year, year#x)], [s#x, named_struct(course, course#x, year, year#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [s#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, 2012}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, 2013}#xL] ++- Aggregate [s#x], [s#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,2012], [Java,2013], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [s#x, named_struct(course, course#x, year, year#x)], [s#x, named_struct(course, course#x, year, year#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -453,25 +441,24 @@ PIVOT ( FOR (course, s) IN (('dotNET', 2) as c1, ('Java', 1) as c2) ) -- !query analysis -Project [year#x, c1#xL, c2#xL] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS c1#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS c2#xL] - +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,2], [Java,1], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS c1#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS c2#xL] ++- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,2], [Java,1], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -610,25 +597,24 @@ PIVOT ( FOR a IN (array(1, 1), array(2, 2)) ) -- !query analysis -Project [year#x, [1, 1]#xL, [2, 2]#xL] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS [1, 1]#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS [2, 2]#xL] - +- Aggregate [year#x], [year#x, pivotfirst(a#x, sum(__auto_generated_subquery_name.earnings)#xL, [1,1], [2,2], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, a#x], [year#x, a#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [earnings#x, year#x, a#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS [1, 1]#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS [2, 2]#xL] ++- Aggregate [year#x], [year#x, pivotfirst(a#x, sum(__auto_generated_subquery_name.earnings)#xL, [1,1], [2,2], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, a#x], [year#x, a#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [earnings#x, year#x, a#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -642,25 +628,24 @@ PIVOT ( FOR (course, a) IN (('dotNET', array(1, 1)), ('Java', array(2, 2))) ) -- !query analysis -Project [year#x, {dotNET, [1, 1]}#xL, {Java, [2, 2]}#xL] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, [1, 1]}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, [2, 2]}#xL] - +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,[1,1]], [Java,[2,2]], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, named_struct(course, course#x, a, a#x)], [year#x, named_struct(course, course#x, a, a#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, year#x, a#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, [1, 1]}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, [2, 2]}#xL] ++- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,[1,1]], [Java,[2,2]], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, named_struct(course, course#x, a, a#x)], [year#x, named_struct(course, course#x, a, a#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, year#x, a#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -674,25 +659,24 @@ PIVOT ( FOR s IN ((1, 'a'), (2, 'b')) ) -- !query analysis -Project [year#x, {1, a}#xL, {2, b}#xL] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {1, a}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {2, b}#xL] - +- Aggregate [year#x], [year#x, pivotfirst(s#x, sum(__auto_generated_subquery_name.earnings)#xL, [1,a], [2,b], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, s#x], [year#x, s#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [earnings#x, year#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {1, a}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {2, b}#xL] ++- Aggregate [year#x], [year#x, pivotfirst(s#x, sum(__auto_generated_subquery_name.earnings)#xL, [1,a], [2,b], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, s#x], [year#x, s#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [earnings#x, year#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -706,25 +690,24 @@ PIVOT ( FOR (course, s) IN (('dotNET', (1, 'a')), ('Java', (2, 'b'))) ) -- !query analysis -Project [year#x, {dotNET, {1, a}}#xL, {Java, {2, b}}#xL] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, {1, a}}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, {2, b}}#xL] - +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,[1,a]], [Java,[2,b]], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, year#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, {1, a}}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, {2, b}}#xL] ++- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,[1,a]], [Java,[2,b]], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, year#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -779,15 +762,14 @@ PIVOT ( FOR Course IN ('dotNET', 'Java') ) -- !query analysis -Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, dotNET#xL, Java#xL] -+- Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x[1] AS Java#xL] - +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, pivotfirst(Course#x, sum(__auto_generated_subquery_name.Earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x] - +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x, sum(Earnings#x) AS sum(__auto_generated_subquery_name.Earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, a AS a#x, z AS z#x, b AS b#x, y AS y#x, c AS c#x, x AS x#x, d AS d#x, w AS w#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x[1] AS Java#xL] ++- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, pivotfirst(Course#x, sum(__auto_generated_subquery_name.Earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x] + +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x, sum(Earnings#x) AS sum(__auto_generated_subquery_name.Earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, a AS a#x, z AS z#x, b AS b#x, y AS y#x, c AS c#x, x AS x#x, d AS d#x, w AS w#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-natural-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-natural-join.sql.out index 324622e615da..d80507eb5d8d 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-natural-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-natural-join.sql.out @@ -36,22 +36,21 @@ CreateViewCommand `nt2`, select * from values -- !query SELECT * FROM nt1 natural join nt2 where udf(k) = "one" -- !query analysis -Project [k#x, v1#x, v2#x] -+- Filter (cast(udf(cast(k#x as string)) as string) = one) - +- Project [k#x, v1#x, v2#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] +Filter (cast(udf(cast(k#x as string)) as string) = one) ++- Project [k#x, v1#x, v2#x] + +- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out index e92b7003d6e5..56d2e13faa0c 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out @@ -59,18 +59,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [udf(year)#x, dotNET#xL, Java#xL] -+- Project [udf(year)#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] - +- Aggregate [udf(year)#x], [udf(year)#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [udf(year)#x, course#x], [udf(year)#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [cast(udf(cast(year#x as string)) as int) AS udf(year)#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [udf(year)#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] ++- Aggregate [udf(year)#x], [udf(year)#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [udf(year)#x, course#x], [udf(year)#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [cast(udf(cast(year#x as string)) as int) AS udf(year)#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -80,16 +79,15 @@ PIVOT ( FOR year IN (2012, 2013) ) -- !query analysis -Project [course#x, 2012#xL, 2013#xL] -+- Project [course#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS 2012#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS 2013#xL] - +- Aggregate [course#x], [course#x, pivotfirst(year#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [course#x, year#x], [course#x, year#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [course#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS 2012#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS 2013#xL] ++- Aggregate [course#x], [course#x, pivotfirst(year#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [course#x, year#x], [course#x, year#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -101,18 +99,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET_udf(sum(earnings))#xL, dotNET_udf(avg(earnings))#x, Java_udf(sum(earnings))#xL, Java_udf(avg(earnings))#x] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x[0] AS dotNET_udf(avg(earnings))#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x[1] AS Java_udf(avg(earnings))#x] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, cast(udf(cast(avg(earnings#x) as string)) as double) AS CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x[0] AS dotNET_udf(avg(earnings))#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x[1] AS Java_udf(avg(earnings))#x] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, cast(udf(cast(avg(earnings#x) as string)) as double) AS CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -124,18 +121,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [dotNET#xL, Java#xL] -+- Project [__pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] - +- Aggregate [pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [course#x], [course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [cast(udf(cast(course#x as string)) as string) AS course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] ++- Aggregate [pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [course#x], [course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [cast(udf(cast(course#x as string)) as string) AS course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -147,18 +143,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [dotNET_udf(sum(udf(earnings)))#xL, dotNET_udf(min(year))#x, Java_udf(sum(udf(earnings)))#xL, Java_udf(min(year))#x] -+- Project [__pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(udf(earnings)))#xL, __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x[0] AS dotNET_udf(min(year))#x, __pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(udf(earnings)))#xL, __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x[1] AS Java_udf(min(year))#x] - +- Aggregate [pivotfirst(course#x, CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(min(year) as string)) AS INT)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x] - +- Aggregate [course#x], [course#x, cast(udf(cast(sum(cast(udf(cast(earnings#x as string)) as int)) as string)) as bigint) AS CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)#xL, cast(udf(cast(min(year#x) as string)) as int) AS CAST(udf(cast(min(year) as string)) AS INT)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(udf(earnings)))#xL, __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x[0] AS dotNET_udf(min(year))#x, __pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(udf(earnings)))#xL, __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x[1] AS Java_udf(min(year))#x] ++- Aggregate [pivotfirst(course#x, CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(min(year) as string)) AS INT)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x] + +- Aggregate [course#x], [course#x, cast(udf(cast(sum(cast(udf(cast(earnings#x as string)) as int)) as string)) as bigint) AS CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)#xL, cast(udf(cast(min(year#x) as string)) as int) AS CAST(udf(cast(min(year) as string)) AS INT)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -172,25 +167,24 @@ PIVOT ( FOR s IN (1, 2) ) -- !query analysis -Project [course#x, year#x, 1#xL, 2#xL] -+- Project [course#x, year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS 1#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS 2#xL] - +- Aggregate [course#x, year#x], [course#x, year#x, pivotfirst(s#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, 1, 2, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [course#x, year#x, s#x], [course#x, year#x, s#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, cast(udf(cast(s#x as string)) as int) AS s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [course#x, year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS 1#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS 2#xL] ++- Aggregate [course#x, year#x], [course#x, year#x, pivotfirst(s#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, 1, 2, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [course#x, year#x, s#x], [course#x, year#x, s#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, cast(udf(cast(s#x as string)) as int) AS s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -204,25 +198,24 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET_udf(sum(earnings))#xL, dotNET_udf(min(s))#x, Java_udf(sum(earnings))#xL, Java_udf(min(s))#x] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x[0] AS dotNET_udf(min(s))#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x[1] AS Java_udf(min(s))#x] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(min(s) as string)) AS INT)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, cast(udf(cast(min(s#x) as string)) as int) AS CAST(udf(cast(min(s) as string)) AS INT)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x[0] AS dotNET_udf(min(s))#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x[1] AS Java_udf(min(s))#x] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(min(s) as string)) AS INT)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, cast(udf(cast(min(s#x) as string)) as int) AS CAST(udf(cast(min(s) as string)) AS INT)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -236,25 +229,24 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET#xL, Java#xL] -+- Project [year#x, __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x[1] AS Java#xL] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum((earnings#x * s#x)) as string)) as bigint) AS CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x[1] AS Java#xL] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum((earnings#x * s#x)) as string)) as bigint) AS CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -266,18 +258,17 @@ PIVOT ( FOR y IN (2012, 2013) ) -- !query analysis -Project [2012_s#xL, 2013_s#xL, 2012_a#x, 2013_a#x, c#x] -+- Project [c#x, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[0] AS 2012_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[0] AS 2012_a#x, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[1] AS 2013_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[1] AS 2013_a#x] - +- Aggregate [c#x], [c#x, pivotfirst(y#x, CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x, pivotfirst(y#x, CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x] - +- Aggregate [c#x, y#x], [c#x, y#x, cast(udf(cast(sum(e#x) as string)) as bigint) AS CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, cast(udf(cast(avg(e#x) as string)) as double) AS CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[0] AS 2012_s#xL, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[1] AS 2013_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[0] AS 2012_a#x, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[1] AS 2013_a#x, c#x] ++- Aggregate [c#x], [c#x, pivotfirst(y#x, CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x, pivotfirst(y#x, CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x] + +- Aggregate [c#x, y#x], [c#x, y#x, cast(udf(cast(sum(e#x) as string)) as bigint) AS CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, cast(udf(cast(avg(e#x) as string)) as double) AS CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -289,18 +280,17 @@ PIVOT ( FOR y IN (2012 as firstYear, 2013 secondYear) ) -- !query analysis -Project [firstYear_s#xL, secondYear_s#xL, firstYear_a#x, secondYear_a#x, c#x] -+- Project [c#x, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[0] AS firstYear_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[0] AS firstYear_a#x, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[1] AS secondYear_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[1] AS secondYear_a#x] - +- Aggregate [c#x], [c#x, pivotfirst(y#x, CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x, pivotfirst(y#x, CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x] - +- Aggregate [c#x, y#x], [c#x, y#x, cast(udf(cast(sum(e#x) as string)) as bigint) AS CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, cast(udf(cast(avg(e#x) as string)) as double) AS CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[0] AS firstYear_s#xL, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[1] AS secondYear_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[0] AS firstYear_a#x, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[1] AS secondYear_a#x, c#x] ++- Aggregate [c#x], [c#x, pivotfirst(y#x, CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x, pivotfirst(y#x, CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x] + +- Aggregate [c#x, y#x], [c#x, y#x, cast(udf(cast(sum(e#x) as string)) as bigint) AS CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, cast(udf(cast(avg(e#x) as string)) as double) AS CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -373,18 +363,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET_udf(CEIL(udf(sum(earnings))))#xL, dotNET_a1#x, Java_udf(CEIL(udf(sum(earnings))))#xL, Java_a1#x] -+- Project [year#x, __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x[0] AS dotNET_udf(CEIL(udf(sum(earnings))))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[0] AS dotNET_a1#x, __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x[1] AS Java_udf(CEIL(udf(sum(earnings))))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[1] AS Java_a1#x] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x, pivotfirst(course#x, (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x, dotNET, Java, 0, 0) AS __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(CEIL(cast(udf(cast(sum(earnings#x) as string)) as bigint)) as string)) as bigint) AS CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)#xL, (avg(earnings#x) + cast(1 as double)) AS (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [year#x, __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x[0] AS dotNET_udf(CEIL(udf(sum(earnings))))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[0] AS dotNET_a1#x, __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x[1] AS Java_udf(CEIL(udf(sum(earnings))))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[1] AS Java_a1#x] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x, pivotfirst(course#x, (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x, dotNET, Java, 0, 0) AS __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(CEIL(cast(udf(cast(sum(earnings#x) as string)) as bigint)) as string)) as bigint) AS CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)#xL, (avg(earnings#x) + cast(1 as double)) AS (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -421,25 +410,24 @@ PIVOT ( FOR (course, year) IN (('dotNET', 2012), ('Java', 2013)) ) -- !query analysis -Project [s#x, {dotNET, 2012}#xL, {Java, 2013}#xL] -+- Project [s#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, 2012}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, 2013}#xL] - +- Aggregate [s#x], [s#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,2012], [Java,2013], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [s#x, named_struct(course, course#x, year, year#x)], [s#x, named_struct(course, course#x, year, year#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [s#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, 2012}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, 2013}#xL] ++- Aggregate [s#x], [s#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,2012], [Java,2013], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [s#x, named_struct(course, course#x, year, year#x)], [s#x, named_struct(course, course#x, year, year#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -453,25 +441,24 @@ PIVOT ( FOR (course, s) IN (('dotNET', 2) as c1, ('Java', 1) as c2) ) -- !query analysis -Project [year#x, c1#xL, c2#xL] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS c1#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS c2#xL] - +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,2], [Java,1], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS c1#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS c2#xL] ++- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,2], [Java,1], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -550,25 +537,24 @@ PIVOT ( FOR a IN (array(1, 1), array(2, 2)) ) -- !query analysis -Project [year#x, [1, 1]#xL, [2, 2]#xL] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS [1, 1]#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS [2, 2]#xL] - +- Aggregate [year#x], [year#x, pivotfirst(a#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [1,1], [2,2], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, a#x], [year#x, a#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [earnings#x, year#x, a#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS [1, 1]#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS [2, 2]#xL] ++- Aggregate [year#x], [year#x, pivotfirst(a#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [1,1], [2,2], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, a#x], [year#x, a#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [earnings#x, year#x, a#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -582,25 +568,24 @@ PIVOT ( FOR (course, a) IN (('dotNET', array(1, 1)), ('Java', array(2, 2))) ) -- !query analysis -Project [year#x, {dotNET, [1, 1]}#xL, {Java, [2, 2]}#xL] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, [1, 1]}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, [2, 2]}#xL] - +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,[1,1]], [Java,[2,2]], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, named_struct(course, course#x, a, a#x)], [year#x, named_struct(course, course#x, a, a#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, cast(udf(cast(year#x as string)) as int) AS year#x, a#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, [1, 1]}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, [2, 2]}#xL] ++- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,[1,1]], [Java,[2,2]], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, named_struct(course, course#x, a, a#x)], [year#x, named_struct(course, course#x, a, a#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, cast(udf(cast(year#x as string)) as int) AS year#x, a#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -614,25 +599,24 @@ PIVOT ( FOR s IN ((1, 'a'), (2, 'b')) ) -- !query analysis -Project [year#x, {1, a}#xL, {2, b}#xL] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {1, a}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {2, b}#xL] - +- Aggregate [year#x], [year#x, pivotfirst(s#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [1,a], [2,b], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, s#x], [year#x, s#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [earnings#x, year#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {1, a}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {2, b}#xL] ++- Aggregate [year#x], [year#x, pivotfirst(s#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [1,a], [2,b], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, s#x], [year#x, s#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [earnings#x, year#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -646,25 +630,24 @@ PIVOT ( FOR (course, s) IN (('dotNET', (1, 'a')), ('Java', (2, 'b'))) ) -- !query analysis -Project [year#x, {dotNET, {1, a}}#xL, {Java, {2, b}}#xL] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, {1, a}}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, {2, b}}#xL] - +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,[1,a]], [Java,[2,b]], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, year#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, {1, a}}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, {2, b}}#xL] ++- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,[1,a]], [Java,[2,b]], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, year#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -720,15 +703,14 @@ PIVOT ( FOR Course IN ('dotNET', 'Java') ) -- !query analysis -Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, dotNET#xL, Java#xL] -+- Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] - +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, pivotfirst(Course#x, CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x] - +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x, cast(udf(cast(sum(Earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, cast(udf(cast(a as string)) as string) AS a#x, cast(udf(cast(z as string)) as string) AS z#x, cast(udf(cast(b as string)) as string) AS b#x, cast(udf(cast(y as string)) as string) AS y#x, cast(udf(cast(c as string)) as string) AS c#x, cast(udf(cast(x as string)) as string) AS x#x, cast(udf(cast(d as string)) as string) AS d#x, cast(udf(cast(w as string)) as string) AS w#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] ++- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, pivotfirst(Course#x, CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x] + +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x, cast(udf(cast(sum(Earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, cast(udf(cast(a as string)) as string) AS a#x, cast(udf(cast(z as string)) as string) AS z#x, cast(udf(cast(b as string)) as string) AS b#x, cast(udf(cast(y as string)) as string) AS y#x, cast(udf(cast(c as string)) as string) AS c#x, cast(udf(cast(x as string)) as string) AS x#x, cast(udf(cast(d as string)) as string) AS d#x, cast(udf(cast(w as string)) as string) AS w#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out index 3c9c6ec169af..6f0008edb408 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out @@ -39,80 +39,76 @@ CreateViewCommand `nt2`, select * from values SELECT * FROM nt1 left outer join nt2 using (k) -- !query analysis Project [k#x, v1#x, v2#x] -+- Project [k#x, v1#x, v2#x] - +- Join LeftOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k FROM nt1 left outer join nt2 using (k) -- !query analysis Project [k#x] -+- Project [k#x, v1#x, v2#x] - +- Join LeftOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.*, nt2.* FROM nt1 left outer join nt2 using (k) -- !query analysis Project [k#x, v1#x, k#x, v2#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join LeftOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.k, nt2.k FROM nt1 left outer join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join LeftOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -183,220 +179,209 @@ Sort [k#x ASC NULLS FIRST], true SELECT k, nt1.k FROM nt1 left outer join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x] - +- Join LeftOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k, nt2.k FROM nt1 left outer join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join LeftOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT * FROM nt1 left semi join nt2 using (k) -- !query analysis Project [k#x, v1#x] -+- Project [k#x, v1#x] - +- Join LeftSemi, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftSemi, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k FROM nt1 left semi join nt2 using (k) -- !query analysis Project [k#x] -+- Project [k#x, v1#x] - +- Join LeftSemi, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftSemi, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.* FROM nt1 left semi join nt2 using (k) -- !query analysis Project [k#x, v1#x] -+- Project [k#x, v1#x] - +- Join LeftSemi, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftSemi, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.k FROM nt1 left semi join nt2 using (k) -- !query analysis Project [k#x] -+- Project [k#x, v1#x] - +- Join LeftSemi, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftSemi, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k, nt1.k FROM nt1 left semi join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x] - +- Join LeftSemi, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftSemi, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT * FROM nt1 right outer join nt2 using (k) -- !query analysis Project [k#x, v1#x, v2#x] -+- Project [k#x, v1#x, v2#x] - +- Join RightOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join RightOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k FROM nt1 right outer join nt2 using (k) -- !query analysis Project [k#x] -+- Project [k#x, v1#x, v2#x] - +- Join RightOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join RightOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.*, nt2.* FROM nt1 right outer join nt2 using (k) -- !query analysis Project [k#x, v1#x, k#x, v2#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join RightOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join RightOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.k, nt2.k FROM nt1 right outer join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join RightOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join RightOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -446,120 +431,114 @@ Sort [key#x ASC NULLS FIRST], true SELECT k, nt1.k FROM nt1 right outer join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join RightOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join RightOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k, nt2.k FROM nt1 right outer join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x] - +- Join RightOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join RightOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT * FROM nt1 full outer join nt2 using (k) -- !query analysis -Project [k#x, v1#x, v2#x] -+- Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x] - +- Join FullOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] +Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x] ++- Join FullOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k FROM nt1 full outer join nt2 using (k) -- !query analysis -Project [k#x] -+- Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x] - +- Join FullOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] +Project [coalesce(k#x, k#x) AS k#x] ++- Join FullOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.*, nt2.* FROM nt1 full outer join nt2 using (k) -- !query analysis Project [k#x, v1#x, k#x, v2#x] -+- Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x, k#x, k#x] - +- Join FullOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join FullOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.k, nt2.k FROM nt1 full outer join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x, k#x, k#x] - +- Join FullOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join FullOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -608,121 +587,115 @@ Sort [key#x ASC NULLS FIRST], true -- !query SELECT k, nt1.k FROM nt1 full outer join nt2 using (k) -- !query analysis -Project [k#x, k#x] -+- Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x, k#x, k#x] - +- Join FullOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] +Project [coalesce(k#x, k#x) AS k#x, k#x] ++- Join FullOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k, nt2.k FROM nt1 full outer join nt2 using (k) -- !query analysis -Project [k#x, k#x] -+- Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x, k#x, k#x] - +- Join FullOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] +Project [coalesce(k#x, k#x) AS k#x, k#x] ++- Join FullOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT * FROM nt1 full outer join nt2 using (k) -- !query analysis -Project [k#x, v1#x, v2#x] -+- Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x] - +- Join FullOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] +Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x] ++- Join FullOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k FROM nt1 inner join nt2 using (k) -- !query analysis Project [k#x] -+- Project [k#x, v1#x, v2#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.*, nt2.* FROM nt1 inner join nt2 using (k) -- !query analysis Project [k#x, v1#x, k#x, v2#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.k, nt2.k FROM nt1 inner join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -772,40 +745,38 @@ Sort [key#x ASC NULLS FIRST], true SELECT k, nt1.k FROM nt1 inner join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k, nt2.k FROM nt1 inner join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query From 025ee9ad71bc72756016a46260fb3f7baf74294b Mon Sep 17 00:00:00 2001 From: ashahid Date: Wed, 3 Jan 2024 14:12:07 -0800 Subject: [PATCH 117/129] SPARK-45959. refactored CacheManager code --- .../spark/sql/execution/CacheManager.scala | 347 ++++++++++-------- 1 file changed, 192 insertions(+), 155 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index 6fd5ae295d6f..d9da49edd751 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -314,170 +314,207 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { Alias5 (k, f(attr1, attr2, al3, al4) Alias6 (p, f(attr1, attr2, al3, al4) */ - - /** Optionally returns cached data for the given [[LogicalPlan]]. */ def lookupCachedData(plan: LogicalPlan): Option[CachedData] = { val fullMatch = cachedData.find(cd => plan.sameResult(cd.plan)) - fullMatch.map(Option(_)).getOrElse({ - var foundMatch = false - var partialMatch: Option[CachedData] = None - for (cd <- cachedData if !foundMatch) { - (plan, cd.plan) match { - case (incomingPlan: UnaryNode, cachedPlan: UnaryNode) => - if (incomingPlan.child.sameResult(cachedPlan.child)) { - if (incomingPlan.getClass == cachedPlan.getClass && - incomingPlan.isInstanceOf[Project]) { - val incomingProject = incomingPlan.asInstanceOf[Project] - val cdPlanProject = cachedPlan.asInstanceOf[Project] - // since the child of both incoming and cached plan are same - // that is why we are here. for mapping and comparison purposes lets - // canonicalize the cachedPlan's project list in terms of the incoming plan's child - // so that we can map correctly. - val cdPlanToIncomngPlanChildOutputMapping = - cdPlanProject.child.output.zip(incomingProject.child.output).toMap - // val canonicalizedInProj = incomingProject.canonicalized.asInstanceOf[Project] - val canonicalizedCdProjList = cdPlanProject.projectList.map(_.transformUp { - case attr: Attribute => cdPlanToIncomngPlanChildOutputMapping(attr) - }.asInstanceOf[NamedExpression]) - // matchIndexInCdPlanProj remains -1 in the end, itindicates it is - // new cols created out of existing output attribs - val (directlyMappedincomingToCachedPlanIndx, inComingProjNoDirectMapping) = - incomingProject.projectList.zipWithIndex.map { - case (inComingNE, index) => - // first check for equivalent named expressions..if index is != -1, that means - // it is pass thru Alias or pass thru - Attribute - var matchIndexInCdPlanProj = canonicalizedCdProjList.indexWhere(_ == inComingNE) - if (matchIndexInCdPlanProj == -1) { - // if match index is -1, that means it could be two possibilities: - // 1) it is a case of rename which means the incoming expr is an alias and - // its child is an attrib ref, which may have a direct attribref in the - // cdPlanProj, or it may actually have an alias whose ref matches the ref - // of incoming attribRef - // 2) the positions in the incoming project alias and the cdPlanProject are - // different. as a result the canonicalized alias of each would have - // relatively different exprIDs ( as their relative positions differ), but - // even in such cases as their child logical plans are same, so the child - // expression of each alias will have same canonicalized data - val incomingExprToCheck = inComingNE match { - case x: AttributeReference => x - case Alias(expr, _) => expr - } - matchIndexInCdPlanProj = canonicalizedCdProjList.indexWhere { - case Alias(expr, _) => expr == incomingExprToCheck - case x => x == incomingExprToCheck - } - } - index -> matchIndexInCdPlanProj - }.partition(_._2 != -1) - - // Now there is a possible case wherea literal is present in IMR as attribute - // and the incoming project also has that literal somewhere in the alias. Though - // we do not need to read it but looks like the deserializer fails if we skip that - // literal in the projection enforced on IMR. so in effect even if we do not - // require an attribute it still needs to be present in the projection forced - // also its possible that some attribute from IMR can be used in subexpression - // of the incoming projection. so we have to handle that - val unusedAttribsOfCDPlanToGenIncomingAttr = - cdPlanProject.projectList.indices.filterNot(i => - directlyMappedincomingToCachedPlanIndx.exists(_._2 == i)).map(i => { - val cdAttrib = cdPlanProject.projectList(i) - i -> AttributeReference(cdAttrib.name, cdAttrib.dataType, - cdAttrib.nullable, cdAttrib.metadata)(qualifier = cdAttrib.qualifier) - }) - - // Because in case of rename multiple incmong named exprs ( attribute or aliases) - // will point to a common cdplan attrib, we need to ensure they do not create - // separate attribute in the the modifiedProject for incoming plan.. - // that is a single attribute ref is present in all mixes of rename and pass thru - // attributes. - // so we will use the first attribute ref in the incoming directly mapped project - // or if no attrib exists ( only case of rename) we will pick the child expr which - // is bound to be an attribute as the common ref. - val cdAttribToCommonAttribForIncmngNe = directlyMappedincomingToCachedPlanIndx.map { - case (inAttribIndex, cdAttribIndex) => - cdPlanProject.projectList(cdAttribIndex).toAttribute -> - incomingProject.projectList(inAttribIndex) - }.groupBy(_._1).map { - case (cdAttr, incomngSeq) => - val incmngCommonAttrib = incomngSeq.map(_._2).flatMap { - case attr: Attribute => Seq(attr) - case Alias(attr: Attribute, _) => Seq(attr) - case _ => Seq.empty - }.headOption.getOrElse( - AttributeReference(cdAttr.name, cdAttr.dataType, cdAttr.nullable)()) - cdAttr -> incmngCommonAttrib - } + fullMatch.map(Option(_)).getOrElse(lookUpPartiallyMatchedCachedPlan(plan)) + } + + private def lookUpPartiallyMatchedCachedPlan(plan: LogicalPlan): Option[CachedData] = { + var foundMatch = false + var partialMatch: Option[CachedData] = None + for (cd <- cachedData if !foundMatch) { + (plan, cd.plan) match { + case (incomingPlan: UnaryNode, cachedPlan: UnaryNode) => + if (incomingPlan.child.sameResult(cachedPlan.child)) { + if (incomingPlan.getClass == cachedPlan.getClass && + incomingPlan.isInstanceOf[Project]) { + val incomingProject = incomingPlan.asInstanceOf[Project] + val cdPlanProject = cachedPlan.asInstanceOf[Project] + // since the child of both incoming and cached plan are same + // that is why we are here. for mapping and comparison purposes lets + // canonicalize the cachedPlan's project list in terms of the incoming plan's child + // so that we can map correctly. + val cdPlanToIncomngPlanChildOutputMapping = + cdPlanProject.child.output.zip(incomingProject.child.output).toMap + + val canonicalizedCdProjList = cdPlanProject.projectList.map(_.transformUp { + case attr: Attribute => cdPlanToIncomngPlanChildOutputMapping(attr) + }.asInstanceOf[NamedExpression]) + + // matchIndexInCdPlanProj remains -1 in the end, it indicates it is + // new cols created out of existing output attribs + val (directlyMappedincomingToCachedPlanIndx, inComingProjNoDirectMapping) = + getDirectAndIndirectMappingOfIncomingToCachedProjectAttribs( + incomingProject, canonicalizedCdProjList) + + // Now there is a possible case where a literal is present in IMR as attribute + // and the incoming project also has that literal somewhere in the alias. Though + // we do not need to read it but looks like the deserializer fails if we skip that + // literal in the projection enforced on IMR. so in effect even if we do not + // require an attribute it still needs to be present in the projection forced + // also its possible that some attribute from IMR can be used in subexpression + // of the incoming projection. so we have to handle that + val unusedAttribsOfCDPlanToGenIncomingAttr = + cdPlanProject.projectList.indices.filterNot(i => + directlyMappedincomingToCachedPlanIndx.exists(_._2 == i)).map(i => { + val cdAttrib = cdPlanProject.projectList(i) + i -> AttributeReference(cdAttrib.name, cdAttrib.dataType, + cdAttrib.nullable, cdAttrib.metadata)(qualifier = cdAttrib.qualifier) + }) + + // Because in case of rename multiple incmong named exprs ( attribute or aliases) + // will point to a common cdplan attrib, we need to ensure they do not create + // separate attribute in the the modifiedProject for incoming plan.. + // that is a single attribute ref is present in all mixes of rename and pass thru + // attributes. + // so we will use the first attribute ref in the incoming directly mapped project + // or if no attrib exists ( only case of rename) we will pick the child expr which + // is bound to be an attribute as the common ref. + val cdAttribToCommonAttribForIncmngNe = directlyMappedincomingToCachedPlanIndx.map { + case (inAttribIndex, cdAttribIndex) => + cdPlanProject.projectList(cdAttribIndex).toAttribute -> + incomingProject.projectList(inAttribIndex) + }.groupBy(_._1).map { + case (cdAttr, incomngSeq) => + val incmngCommonAttrib = incomngSeq.map(_._2).flatMap { + case attr: Attribute => Seq(attr) + case Alias(attr: Attribute, _) => Seq(attr) + case _ => Seq.empty + }.headOption.getOrElse( + AttributeReference(cdAttr.name, cdAttr.dataType, cdAttr.nullable)()) + cdAttr -> incmngCommonAttrib + } - // If expressions of inComingProjNoDirectMapping can be expressed in terms of the - // incoming attribute refs or incoming alias exprs, which can be mapped directly - // to the CachedPlan's output, we are good. so lets transform such indirectly - // mappable named expressions in terms of mappable attributes of the incoming plan - - val transformedIndirectlyMappableExpr = inComingProjNoDirectMapping.map { - case (incomngIndex, _) => - val indirectIncmnNe = incomingProject.projectList(incomngIndex) - - val modifiedNe = indirectIncmnNe.transformDown { - case expr => directlyMappedincomingToCachedPlanIndx.find { - case(incomingIndex, _) => - val directMappedNe = incomingProject.projectList(incomingIndex) - directMappedNe.toAttribute == expr || - directMappedNe.children.headOption.contains(expr)}.map { - case (_, cdIndex) => - val cdAttrib = cdPlanProject.projectList(cdIndex).toAttribute - cdAttribToCommonAttribForIncmngNe(cdAttrib) - }.orElse( - unusedAttribsOfCDPlanToGenIncomingAttr.find { - case(i, _) => val cdNe = canonicalizedCdProjList(i) - cdNe.children.headOption.contains(expr) - }.map(_._2)). - map(ne => Replaceable(ne.toAttribute)).getOrElse(expr) - }.asInstanceOf[NamedExpression] - - incomngIndex -> modifiedNe - }.toMap - - if (transformedIndirectlyMappableExpr.forall(_._2.references.isEmpty)) { - val projectionToForceOnCdPlan = cachedPlan.output.zipWithIndex.map { - case (cdAttr, i) => - cdAttribToCommonAttribForIncmngNe.getOrElse(cdAttr, - unusedAttribsOfCDPlanToGenIncomingAttr.find(_._1 == i).map(_._2).get) - } - - val modifiedInProj = incomingProject.projectList.zipWithIndex.map { - case (ne, indx) => - directlyMappedincomingToCachedPlanIndx.find(_._1 == indx).map { - case (_, cdIndex) => - ne match { - case attr: Attribute => attr - case al: Alias => - val cdAttr = cdPlanProject.projectList(cdIndex).toAttribute - al.copy(child = cdAttribToCommonAttribForIncmngNe(cdAttr))( - exprId = al.exprId, qualifier = al.qualifier, - explicitMetadata = al.explicitMetadata, - nonInheritableMetadataKeys = al.nonInheritableMetadataKeys - ) - } - }.getOrElse({ - transformedIndirectlyMappableExpr(indx).transformUp { - case Replaceable(attribToUse) => attribToUse - }.asInstanceOf[NamedExpression] - }) - } - val newPartialPlan = Project(modifiedInProj, cd.cachedRepresentation.toOption. - get.withOutput(projectionToForceOnCdPlan)) - partialMatch = Option(cd.copy(cachedRepresentation = Left(newPartialPlan))) - foundMatch = true + // If expressions of inComingProjNoDirectMapping can be expressed in terms of the + // incoming attribute refs or incoming alias exprs, which can be mapped directly + // to the CachedPlan's output, we are good. so lets transform such indirectly + // mappable named expressions in terms of mappable attributes of the incoming plan + val transformedIndirectlyMappableExpr = + transformIndirectlyMappedExpressionsToUseCachedPlanAttributes( + inComingProjNoDirectMapping, incomingProject, cdPlanProject, + directlyMappedincomingToCachedPlanIndx, cdAttribToCommonAttribForIncmngNe, + unusedAttribsOfCDPlanToGenIncomingAttr, canonicalizedCdProjList) + + if (transformedIndirectlyMappableExpr.forall(_._2.references.isEmpty)) { + val projectionToForceOnCdPlan = cachedPlan.output.zipWithIndex.map { + case (cdAttr, i) => + cdAttribToCommonAttribForIncmngNe.getOrElse(cdAttr, + unusedAttribsOfCDPlanToGenIncomingAttr.find(_._1 == i).map(_._2).get) } + + val modifiedInProj = replacementProjectListForIncomingProject(incomingProject, + directlyMappedincomingToCachedPlanIndx, cdPlanProject, + cdAttribToCommonAttribForIncmngNe, transformedIndirectlyMappableExpr) + + val newPartialPlan = Project(modifiedInProj, cd.cachedRepresentation.toOption. + get.withOutput(projectionToForceOnCdPlan)) + partialMatch = Option(cd.copy(cachedRepresentation = Left(newPartialPlan))) + foundMatch = true } } + } - case _ => - } + case _ => } - partialMatch - }) + } + partialMatch + } + + private def replacementProjectListForIncomingProject( + incomingProject: Project, + directlyMappedincomingToCachedPlanIndx: Seq[(Int, Int)], + cdPlanProject: Project, + cdAttribToCommonAttribForIncmngNe: Map[Attribute, Attribute], + transformedIndirectlyMappableExpr: Map[Int, NamedExpression]): Seq[NamedExpression] = + { + incomingProject.projectList.zipWithIndex.map { + case (ne, indx) => + directlyMappedincomingToCachedPlanIndx.find(_._1 == indx).map { + case (_, cdIndex) => + ne match { + case attr: Attribute => attr + case al: Alias => + val cdAttr = cdPlanProject.projectList(cdIndex).toAttribute + al.copy(child = cdAttribToCommonAttribForIncmngNe(cdAttr))( + exprId = al.exprId, qualifier = al.qualifier, + explicitMetadata = al.explicitMetadata, + nonInheritableMetadataKeys = al.nonInheritableMetadataKeys + ) + } + }.getOrElse({ + transformedIndirectlyMappableExpr(indx).transformUp { + case Replaceable(attribToUse) => attribToUse + }.asInstanceOf[NamedExpression] + }) + } + } + + private def transformIndirectlyMappedExpressionsToUseCachedPlanAttributes( + inComingProjNoDirectMapping: Seq[(Int, Int)], + incomingProject: Project, + cdPlanProject: Project, + directlyMappedincomingToCachedPlanIndx: Seq[(Int, Int)], + cdAttribToCommonAttribForIncmngNe: Map[Attribute, Attribute], + unusedAttribsOfCDPlanToGenIncomingAttr: Seq[(Int, AttributeReference)], + canonicalizedCdProjList: Seq[NamedExpression]): Map[Int, NamedExpression] = + { + inComingProjNoDirectMapping.map { + case (incomngIndex, _) => + val indirectIncmnNe = incomingProject.projectList(incomngIndex) + val modifiedNe = indirectIncmnNe.transformDown { + case expr => directlyMappedincomingToCachedPlanIndx.find { + case (incomingIndex, _) => + val directMappedNe = incomingProject.projectList(incomingIndex) + directMappedNe.toAttribute == expr || + directMappedNe.children.headOption.contains(expr) + }.map { + case (_, cdIndex) => + val cdAttrib = cdPlanProject.projectList(cdIndex).toAttribute + cdAttribToCommonAttribForIncmngNe(cdAttrib) + }.orElse( + unusedAttribsOfCDPlanToGenIncomingAttr.find { + case (i, _) => val cdNe = canonicalizedCdProjList(i) + cdNe.children.headOption.contains(expr) + }.map(_._2)). + map(ne => Replaceable(ne.toAttribute)).getOrElse(expr) + }.asInstanceOf[NamedExpression] + + incomngIndex -> modifiedNe + }.toMap + } + + private def getDirectAndIndirectMappingOfIncomingToCachedProjectAttribs( + incomingProject: Project, + canonicalizedCdProjList: Seq[NamedExpression]): (Seq[(Int, Int)], Seq[(Int, Int)]) = + { + incomingProject.projectList.zipWithIndex.map { + case (inComingNE, index) => + // first check for equivalent named expressions..if index is != -1, that means + // it is pass thru Alias or pass thru - Attribute + var matchIndexInCdPlanProj = canonicalizedCdProjList.indexWhere(_ == inComingNE) + if (matchIndexInCdPlanProj == -1) { + // if match index is -1, that means it could be two possibilities: + // 1) it is a case of rename which means the incoming expr is an alias and + // its child is an attrib ref, which may have a direct attribref in the + // cdPlanProj, or it may actually have an alias whose ref matches the ref + // of incoming attribRef + // 2) the positions in the incoming project alias and the cdPlanProject are + // different. as a result the canonicalized alias of each would have + // relatively different exprIDs ( as their relative positions differ), but + // even in such cases as their child logical plans are same, so the child + // expression of each alias will have same canonicalized data + val incomingExprToCheck = inComingNE match { + case x: AttributeReference => x + case Alias(expr, _) => expr + } + matchIndexInCdPlanProj = canonicalizedCdProjList.indexWhere { + case Alias(expr, _) => expr == incomingExprToCheck + case x => x == incomingExprToCheck + } + } + index -> matchIndexInCdPlanProj + }.partition(_._2 != -1) } /** Replaces segments of the given logical plan with cached versions where possible. */ From 7b4d2fb9f22971502148bd0d68ef94a9ba297b90 Mon Sep 17 00:00:00 2001 From: ashahid Date: Wed, 3 Jan 2024 16:03:31 -0800 Subject: [PATCH 118/129] SPARK-45959. corrected the golden file --- .../resources/sql-tests/analyzer-results/selectExcept.sql.out | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out index 49ea7ed4edcf..7d50d9dfe214 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out @@ -126,7 +126,7 @@ SELECT * EXCEPT (data.f1, data.s2) FROM tbl_view -- !query analysis Project [id#x, name#x, named_struct() AS data#x] +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x,name#x,data#x]) + +- View (`tbl_view`, [id#x, name#x, data#x]) +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] +- Project [id#x, name#x, data#x] +- SubqueryAlias tbl_view From 00ce58d4da6ed643c246e118d0b1bdbc9118cdfc Mon Sep 17 00:00:00 2001 From: ashahid Date: Wed, 3 Jan 2024 23:03:57 -0800 Subject: [PATCH 119/129] SPARK-45959. corrected the golden file. simplified early collapse project --- .../analysis/EarlyCollapseProject.scala | 108 +++++-------- .../udf/postgreSQL/udf-join.sql.out | 151 ++++++++---------- 2 files changed, 112 insertions(+), 147 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/EarlyCollapseProject.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/EarlyCollapseProject.scala index 3d6d348fd22d..790851858677 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/EarlyCollapseProject.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/EarlyCollapseProject.scala @@ -32,9 +32,8 @@ private[sql] object EarlyCollapseProject extends Rule[LogicalPlan] { def apply(logicalPlan: LogicalPlan): LogicalPlan = logicalPlan match { - case newP@Project(newProjList, p@Project(projList, child)) - if checkEarlyCollapsePossible(p, newP, child) => - collapseProjectEarly(newP, newProjList, p, projList, child) getOrElse newP + case newP @ Project(_, p : Project) if checkEarlyCollapsePossible(newP, p) => + collapseProjectEarly(newP, p) getOrElse newP case newP@Project(newProjList, f@Filter(_, filterChild: UnaryNode)) => // check if its case of nested filters followed by project @@ -58,9 +57,8 @@ private[sql] object EarlyCollapseProject extends Rule[LogicalPlan] { AttributeSet(newProjList.filter(_.isInstanceOf[AttributeReference]) .map(_.toAttribute)))) { val p = projectAtEnd.get - val child = p.child - if (checkEarlyCollapsePossible(p, newP, child)) { - val newProjOpt = collapseProjectEarly(newP, newProjList, p, p.projectList, child) + if (checkEarlyCollapsePossible(newP, p)) { + val newProjOpt = collapseProjectEarly(newP, p) newProjOpt.map(collapsedProj => { val lastFilterMod = filterNodes.last.copy(child = collapsedProj) filterNodes.dropRight(1).foldRight(lastFilterMod)((f, c) => f.copy(child = c)) @@ -77,10 +75,12 @@ private[sql] object EarlyCollapseProject extends Rule[LogicalPlan] { case _ => logicalPlan } - private def checkEarlyCollapsePossible(p: Project, newP: Project, child: LogicalPlan): Boolean = - p.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty && - newP.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty && - !child.isInstanceOf[Window] + private def checkEarlyCollapsePossible(newP: Project, p: Project): Boolean = + newP.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty && !p.child.isInstanceOf[Window] && + p.projectList.forall(_.collectFirst { + case ex if !ex.deterministic => ex + case ex: UserDefinedExpression => ex + }.isEmpty) private def transferMetadata(from: Attribute, to: NamedExpression): NamedExpression = if (from.metadata == Metadata.empty) { @@ -99,73 +99,47 @@ private[sql] object EarlyCollapseProject extends Rule[LogicalPlan] { } } - def collapseProjectEarly( - newP: Project, - newProjList: Seq[NamedExpression], - p: Project, - projList: Seq[NamedExpression], - child: LogicalPlan): Option[Project] = { - // In the new column list identify those Named Expressions which are just attributes and - // hence pass thru - val (_, tinkeredOrNewNamedExprs) = newProjList.partition { - case _: Attribute => true - case _ => false - } - + def collapseProjectEarly(newP: Project, p: Project): Option[Project] = { + val child = p.child + val newProjList = newP.projectList + val projList = p.projectList val childOutput = child.outputSet - val attribsRemappedInProj = AttributeMap( + val attribsToExprInProj = AttributeMap( projList.flatMap(ne => ne match { - case _: AttributeReference => Seq.empty[(Attribute, Expression)] + case al@Alias(child, _) => child match { + case attr: Attribute if childOutput.contains(attr) => + Seq(al.toAttribute -> (al, transferMetadata(al.toAttribute, attr))) - case al@Alias(attr: AttributeReference, _) => - if (childOutput.contains(attr)) { - Seq(al.toAttribute -> transferMetadata(al.toAttribute, attr)) - } else { - Seq.empty[(Attribute, Expression)] + case _ => Seq(al.toAttribute -> (al, child)) } - case _ => Seq.empty[(Attribute, Expression)] + case _ => Seq.empty[(Attribute, (NamedExpression, Expression))] })) - if ((tinkeredOrNewNamedExprs ++ p.projectList).exists(_.collectFirst { - // we will not flatten if expressions contain windows or aggregate as if they - // are collapsed it can cause recalculation of functions and inefficiency with - // separate group by clauses - case ex if !ex.deterministic => ex - case ex: UserDefinedExpression => ex - }.nonEmpty)) { - None - } else { - val remappedNewProjListResult = Try { - newProjList.map { - case attr: AttributeReference => projList.find( - _.toAttribute.canonicalized == attr.canonicalized).map { - case al: Alias => - if (attr.name == al.name) { - transferMetadata(attr, al) - } else { - // To Handle the case of change of (Caps/lowercase) via toSchema resulting - // in rename - transferMetadata(attr, al.copy(name = attr.name)( - exprId = al.exprId, qualifier = al.qualifier, - explicitMetadata = al.explicitMetadata, - nonInheritableMetadataKeys = al.nonInheritableMetadataKeys)) - } - - case _: AttributeReference => attr - }.getOrElse(attr) - - case anyOtherExpr => - (anyOtherExpr transformUp { + val remappedNewProjListResult = Try { + newProjList.map { + case attr: AttributeReference => attribsToExprInProj.get(attr).map { + case (al : Alias, _) => if (attr.name == al.name) { + transferMetadata(attr, al) + } else { + // To Handle the case of change of (Caps/lowercase) via toSchema resulting + // in rename + transferMetadata(attr, al.copy(name = attr.name)( + exprId = al.exprId, qualifier = al.qualifier, + explicitMetadata = al.explicitMetadata, + nonInheritableMetadataKeys = al.nonInheritableMetadataKeys)) + } + }.getOrElse(attr) + + case ne => (ne transformUp { case attr: AttributeReference => - attribsRemappedInProj.get(attr).orElse(projList.find( - _.toAttribute.canonicalized == attr.canonicalized).map { - case al: Alias => al.child - case _ => attr - }).getOrElse(attr) + attribsToExprInProj.get(attr).map { + case (_, expr) => expr + }.getOrElse(attr) }).asInstanceOf[NamedExpression] } } + remappedNewProjListResult match { case Success(remappedNewProjList) => val newProject = Project(remappedNewProjList, child) @@ -197,6 +171,6 @@ private[sql] object EarlyCollapseProject extends Rule[LogicalPlan] { None } } - } + } } diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out index 348aa9535f2c..bda91030bafa 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out @@ -415,12 +415,11 @@ SELECT udf('') AS `xxx`, udf(i) AS i, udf(j), udf(t) AS t, udf(k) FROM J1_TBL INNER JOIN J2_TBL USING (i) -- !query analysis Project [cast(udf(cast( as string)) as string) AS xxx#x, cast(udf(cast(i#x as string)) as int) AS i#x, cast(udf(cast(j#x as string)) as int) AS udf(j)#x, cast(udf(cast(t#x as string)) as string) AS t#x, cast(udf(cast(k#x as string)) as int) AS udf(k)#x] -+- Project [i#x, j#x, t#x, k#x] - +- Join Inner, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join Inner, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -428,12 +427,11 @@ SELECT udf(udf('')) AS `xxx`, udf(i), udf(j) AS j, udf(t), udf(k) AS k FROM J1_TBL JOIN J2_TBL USING (i) -- !query analysis Project [cast(udf(cast(cast(udf(cast( as string)) as string) as string)) as string) AS xxx#x, cast(udf(cast(i#x as string)) as int) AS udf(i)#x, cast(udf(cast(j#x as string)) as int) AS j#x, cast(udf(cast(t#x as string)) as string) AS udf(t)#x, cast(udf(cast(k#x as string)) as int) AS k#x] -+- Project [i#x, j#x, t#x, k#x] - +- Join Inner, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join Inner, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -460,12 +458,11 @@ SELECT udf(udf('')) AS `xxx`, udf(i), udf(j), udf(t), udf(k) FROM J1_TBL NATURAL JOIN J2_TBL -- !query analysis Project [cast(udf(cast(cast(udf(cast( as string)) as string) as string)) as string) AS xxx#x, cast(udf(cast(i#x as string)) as int) AS udf(i)#x, cast(udf(cast(j#x as string)) as int) AS udf(j)#x, cast(udf(cast(t#x as string)) as string) AS udf(t)#x, cast(udf(cast(k#x as string)) as int) AS udf(k)#x] -+- Project [i#x, j#x, t#x, k#x] - +- Join Inner, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join Inner, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -473,16 +470,15 @@ SELECT udf('') AS `xxx`, udf(udf(udf(a))) AS a, udf(b), udf(c), udf(d) FROM J1_TBL t1 (a, b, c) NATURAL JOIN J2_TBL t2 (a, d) -- !query analysis Project [cast(udf(cast( as string)) as string) AS xxx#x, cast(udf(cast(cast(udf(cast(cast(udf(cast(a#x as string)) as int) as string)) as int) as string)) as int) AS a#x, cast(udf(cast(b#x as string)) as int) AS udf(b)#x, cast(udf(cast(c#x as string)) as string) AS udf(c)#x, cast(udf(cast(d#x as string)) as int) AS udf(d)#x] -+- Project [a#x, b#x, c#x, d#x] - +- Join Inner, (a#x = a#x) - :- SubqueryAlias t1 - : +- Project [i#x AS a#x, j#x AS b#x, t#x AS c#x] - : +- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias t2 - +- Project [i#x AS a#x, k#x AS d#x] - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join Inner, (a#x = a#x) + :- SubqueryAlias t1 + : +- Project [i#x AS a#x, j#x AS b#x, t#x AS c#x] + : +- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias t2 + +- Project [i#x AS a#x, k#x AS d#x] + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -490,16 +486,15 @@ SELECT udf('') AS `xxx`, udf(udf(a)), udf(udf(b)), udf(udf(c)) AS c, udf(udf(udf FROM J1_TBL t1 (a, b, c) NATURAL JOIN J2_TBL t2 (d, a) -- !query analysis Project [cast(udf(cast( as string)) as string) AS xxx#x, cast(udf(cast(cast(udf(cast(a#x as string)) as int) as string)) as int) AS udf(udf(a))#x, cast(udf(cast(cast(udf(cast(b#x as string)) as int) as string)) as int) AS udf(udf(b))#x, cast(udf(cast(cast(udf(cast(c#x as string)) as string) as string)) as string) AS c#x, cast(udf(cast(cast(udf(cast(cast(udf(cast(d#x as string)) as int) as string)) as int) as string)) as int) AS d#x] -+- Project [a#x, b#x, c#x, d#x] - +- Join Inner, (a#x = a#x) - :- SubqueryAlias t1 - : +- Project [i#x AS a#x, j#x AS b#x, t#x AS c#x] - : +- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias t2 - +- Project [i#x AS d#x, k#x AS a#x] - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join Inner, (a#x = a#x) + :- SubqueryAlias t1 + : +- Project [i#x AS a#x, j#x AS b#x, t#x AS c#x] + : +- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias t2 + +- Project [i#x AS d#x, k#x AS a#x] + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -575,12 +570,11 @@ SELECT udf('') AS `xxx`, udf(udf(i)), udf(j), udf(t), udf(k) FROM J1_TBL RIGHT OUTER JOIN J2_TBL USING (i) -- !query analysis Project [cast(udf(cast( as string)) as string) AS xxx#x, cast(udf(cast(cast(udf(cast(i#x as string)) as int) as string)) as int) AS udf(udf(i))#x, cast(udf(cast(j#x as string)) as int) AS udf(j)#x, cast(udf(cast(t#x as string)) as string) AS udf(t)#x, cast(udf(cast(k#x as string)) as int) AS udf(k)#x] -+- Project [i#x, j#x, t#x, k#x] - +- Join RightOuter, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join RightOuter, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -588,12 +582,11 @@ SELECT udf('') AS `xxx`, udf(i), udf(udf(j)), udf(t), udf(k) FROM J1_TBL RIGHT JOIN J2_TBL USING (i) -- !query analysis Project [cast(udf(cast( as string)) as string) AS xxx#x, cast(udf(cast(i#x as string)) as int) AS udf(i)#x, cast(udf(cast(cast(udf(cast(j#x as string)) as int) as string)) as int) AS udf(udf(j))#x, cast(udf(cast(t#x as string)) as string) AS udf(t)#x, cast(udf(cast(k#x as string)) as int) AS udf(k)#x] -+- Project [i#x, j#x, t#x, k#x] - +- Join RightOuter, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join RightOuter, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -790,17 +783,16 @@ FULL JOIN (SELECT * FROM t3) s3 USING (name) -- !query analysis -Project [cast(udf(cast(name#x as string)) as string) AS udf(name)#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS udf(udf(n))#x, cast(udf(cast(n#x as string)) as int) AS udf(n)#x] -+- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [cast(udf(cast(coalesce(name#x, name#x) as string)) as string) AS udf(name)#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS udf(udf(n))#x, cast(udf(cast(n#x as string)) as int) AS udf(n)#x] ++- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -919,25 +911,24 @@ NATURAL FULL JOIN (SELECT name, udf(udf(n)) as s3_n FROM t3) as s3 ) ss2 -- !query analysis -Project [name#x, cast(udf(cast(cast(udf(cast(s1_n#x as string)) as int) as string)) as int) AS udf(udf(s1_n))#x, cast(udf(cast(s2_n#x as string)) as int) AS udf(s2_n)#x, cast(udf(cast(s3_n#x as string)) as int) AS udf(s3_n)#x] -+- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s2_n#x, s3_n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s1 - : +- Project [name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s1_n#x] - : +- SubqueryAlias spark_catalog.default.t1 - : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - +- SubqueryAlias ss2 - +- Project [name#x, s2_n#x, s3_n#x] - +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s3_n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s3_n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, cast(udf(cast(cast(udf(cast(s1_n#x as string)) as int) as string)) as int) AS udf(udf(s1_n))#x, cast(udf(cast(s2_n#x as string)) as int) AS udf(s2_n)#x, cast(udf(cast(s3_n#x as string)) as int) AS udf(s3_n)#x] ++- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s1 + : +- Project [name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s1_n#x] + : +- SubqueryAlias spark_catalog.default.t1 + : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + +- SubqueryAlias ss2 + +- Project [name#x, s2_n#x, s3_n#x] + +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s3_n#x] + +- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s3_n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query From 92ec8aa4a542f4802c037e467104e10809c99927 Mon Sep 17 00:00:00 2001 From: ashahid Date: Thu, 4 Jan 2024 15:55:55 -0800 Subject: [PATCH 120/129] SPARK-45959. corrected the golden file. simplified early collapse project. modified the cache manager to take care of intermediate filters --- .../spark/sql/execution/CacheManager.scala | 78 ++++++++++++++++--- .../analysis/EarlyCollapseProject.scala | 50 +++++++----- .../analyzer-results/natural-join.sql.out | 12 +-- .../analyzer-results/postgreSQL/join.sql.out | 8 +- .../udf/postgreSQL/udf-join.sql.out | 22 +++--- .../udf/udf-natural-join.sql.out | 4 +- 6 files changed, 119 insertions(+), 55 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index d9da49edd751..561fd09a8275 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.execution +import scala.collection.mutable + import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.internal.Logging @@ -27,7 +29,7 @@ import org.apache.spark.sql.catalyst.catalog.HiveTableRelation import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, LeafExpression, NamedExpression, SubqueryExpression} import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} import org.apache.spark.sql.catalyst.optimizer.EliminateResolvedHint -import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan, Project, ResolvedHint, SubqueryAlias, UnaryNode, View} +import org.apache.spark.sql.catalyst.plans.logical.{Filter, IgnoreCachedData, LogicalPlan, Project, ResolvedHint, SubqueryAlias, UnaryNode, View} import org.apache.spark.sql.catalyst.trees.TreePattern.PLAN_EXPRESSION import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.execution.columnar.InMemoryRelation @@ -40,6 +42,7 @@ import org.apache.spark.storage.StorageLevel import org.apache.spark.storage.StorageLevel.MEMORY_AND_DISK import org.apache.spark.util.ArrayImplicits._ + /** Holds a cached logical plan and its data */ case class CachedData(plan: LogicalPlan, cachedRepresentation: Either[LogicalPlan, InMemoryRelation]) @@ -326,7 +329,8 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { for (cd <- cachedData if !foundMatch) { (plan, cd.plan) match { case (incomingPlan: UnaryNode, cachedPlan: UnaryNode) => - if (incomingPlan.child.sameResult(cachedPlan.child)) { + val (incmngchild, skippedFilters) = extractChildIgnoringFilters(incomingPlan) + if (incmngchild.sameResult(cachedPlan.child)) { if (incomingPlan.getClass == cachedPlan.getClass && incomingPlan.isInstanceOf[Project]) { val incomingProject = incomingPlan.asInstanceOf[Project] @@ -336,7 +340,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { // canonicalize the cachedPlan's project list in terms of the incoming plan's child // so that we can map correctly. val cdPlanToIncomngPlanChildOutputMapping = - cdPlanProject.child.output.zip(incomingProject.child.output).toMap + cdPlanProject.child.output.zip(incmngchild.output).toMap val canonicalizedCdProjList = cdPlanProject.projectList.map(_.transformUp { case attr: Attribute => cdPlanToIncomngPlanChildOutputMapping(attr) @@ -402,15 +406,31 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { cdAttribToCommonAttribForIncmngNe.getOrElse(cdAttr, unusedAttribsOfCDPlanToGenIncomingAttr.find(_._1 == i).map(_._2).get) } - - val modifiedInProj = replacementProjectListForIncomingProject(incomingProject, - directlyMappedincomingToCachedPlanIndx, cdPlanProject, - cdAttribToCommonAttribForIncmngNe, transformedIndirectlyMappableExpr) - - val newPartialPlan = Project(modifiedInProj, cd.cachedRepresentation.toOption. - get.withOutput(projectionToForceOnCdPlan)) - partialMatch = Option(cd.copy(cachedRepresentation = Left(newPartialPlan))) - foundMatch = true + val transformedIntermediateFilters = transformFilters(skippedFilters, + projectionToForceOnCdPlan, canonicalizedCdProjList) + if (transformedIntermediateFilters.forall(_.references.isEmpty)) { + val modifiedInProj = replacementProjectListForIncomingProject(incomingProject, + directlyMappedincomingToCachedPlanIndx, cdPlanProject, + cdAttribToCommonAttribForIncmngNe, transformedIndirectlyMappableExpr) + val actualTransformedFilters = transformedIntermediateFilters.map( + _.transformExpressions { + case Replaceable(attr) => attr + }) + val root = cd.cachedRepresentation.toOption.get.withOutput( + projectionToForceOnCdPlan) + val newPartialPlan = if (actualTransformedFilters.isEmpty) { + Project(modifiedInProj, root) + } else { + val lastFilterNode = actualTransformedFilters.last + val lastFilterMod = lastFilterNode.copy( + child = root) + val filterChain = actualTransformedFilters.dropRight(1).foldRight( + lastFilterMod)((f, c) => f.copy( child = c)) + Project(modifiedInProj, filterChain) + } + partialMatch = Option(cd.copy(cachedRepresentation = Left(newPartialPlan))) + foundMatch = true + } } } } @@ -421,6 +441,40 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { partialMatch } + private def transformFilters(skippedFilters: Seq[Filter], + projectionToForceOnCdPlan: Seq[Attribute], + canonicalizedCdProjList: Seq[NamedExpression]): Seq[Filter] = { + val canonicalizedCdProjAsExpr = canonicalizedCdProjList.map { + case Alias(child, _) => child + case x => x + } + skippedFilters.map(f => { + val transformedCondn = f.condition.transformDown { + case expr => val matchedIndex = canonicalizedCdProjAsExpr.indexWhere(_ == expr) + if (matchedIndex != -1) { + Replaceable(projectionToForceOnCdPlan(matchedIndex)) + } else { + expr + } + } + f.copy(condition = transformedCondn) + }) + } + + private def extractChildIgnoringFilters(incomingPlan: UnaryNode): (LogicalPlan, Seq[Filter]) = { + val collectedFilters = mutable.ListBuffer[Filter]() + var child: LogicalPlan = incomingPlan.child + var keepChecking = true + while(keepChecking) { + child match { + case f: Filter => child = f.child + collectedFilters += f + case _ => keepChecking = false + } + } + (child, collectedFilters.toSeq) + } + private def replacementProjectListForIncomingProject( incomingProject: Project, directlyMappedincomingToCachedPlanIndx: Seq[(Int, Int)], diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/EarlyCollapseProject.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/EarlyCollapseProject.scala index 790851858677..5bd27b677c37 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/EarlyCollapseProject.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/EarlyCollapseProject.scala @@ -21,7 +21,7 @@ import scala.collection.mutable import scala.util.{Failure, Success, Try} import org.apache.spark.sql.Dataset -import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, AttributeReference, AttributeSet, Expression, NamedExpression, UserDefinedExpression} +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, AttributeReference, Expression, NamedExpression, UserDefinedExpression} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.types.{Metadata, MetadataBuilder} @@ -29,13 +29,20 @@ import org.apache.spark.util.Utils private[sql] object EarlyCollapseProject extends Rule[LogicalPlan] { - + val expressionRemapper: (Expression, AttributeMap[(NamedExpression, Expression)]) => Expression = + (expr, mappings) => { + expr transformUp { + case attr: AttributeReference => mappings.get(attr).map { + case (_, expr) => expr + }.getOrElse(attr) + } + } def apply(logicalPlan: LogicalPlan): LogicalPlan = logicalPlan match { case newP @ Project(_, p : Project) if checkEarlyCollapsePossible(newP, p) => collapseProjectEarly(newP, p) getOrElse newP - case newP@Project(newProjList, f@Filter(_, filterChild: UnaryNode)) => + case newP@Project(_, f@Filter(_, filterChild: UnaryNode)) => // check if its case of nested filters followed by project val filterNodes = mutable.ListBuffer(f) var projectAtEnd: Option[Project] = None @@ -45,23 +52,28 @@ private[sql] object EarlyCollapseProject extends Rule[LogicalPlan] { currentChild match { case p: Project => projectAtEnd = Option(p) keepGoing = false - case filter@Filter(_, u: UnaryNode) => + case filter @ Filter(expr, u: UnaryNode) if expr.deterministic => filterNodes += filter currentChild = u case _ => keepGoing = false } } - if (projectAtEnd.isDefined && - filterNodes.map(_.condition.references).reduce(_ ++ _). - subsetOf( - AttributeSet(newProjList.filter(_.isInstanceOf[AttributeReference]) - .map(_.toAttribute)))) { + if (projectAtEnd.isDefined) { val p = projectAtEnd.get if (checkEarlyCollapsePossible(newP, p)) { val newProjOpt = collapseProjectEarly(newP, p) + val mappingFilterExpr = AttributeMap(p.projectList.flatMap(ne => ne match { + case _: Attribute => Seq.empty[(Attribute, (NamedExpression, Expression))] + case al: Alias => Seq(al.toAttribute -> (al, al.child)) + })) newProjOpt.map(collapsedProj => { - val lastFilterMod = filterNodes.last.copy(child = collapsedProj) - filterNodes.dropRight(1).foldRight(lastFilterMod)((f, c) => f.copy(child = c)) + val lastFilterNode = filterNodes.last + val lastFilterMod = lastFilterNode.copy( + condition = expressionRemapper(lastFilterNode.condition, mappingFilterExpr), + child = collapsedProj.child) + val filterChain = filterNodes.dropRight(1).foldRight(lastFilterMod)((f, c) => + f.copy(condition = expressionRemapper(f.condition, mappingFilterExpr), child = c)) + collapsedProj.copy(child = filterChain) }).getOrElse { newP } @@ -76,7 +88,9 @@ private[sql] object EarlyCollapseProject extends Rule[LogicalPlan] { } private def checkEarlyCollapsePossible(newP: Project, p: Project): Boolean = - newP.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty && !p.child.isInstanceOf[Window] && + newP.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty && + p.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty && + !p.child.isInstanceOf[Window] && p.projectList.forall(_.collectFirst { case ex if !ex.deterministic => ex case ex: UserDefinedExpression => ex @@ -99,6 +113,8 @@ private[sql] object EarlyCollapseProject extends Rule[LogicalPlan] { } } + + def collapseProjectEarly(newP: Project, p: Project): Option[Project] = { val child = p.child val newProjList = newP.projectList @@ -131,14 +147,9 @@ private[sql] object EarlyCollapseProject extends Rule[LogicalPlan] { } }.getOrElse(attr) - case ne => (ne transformUp { - case attr: AttributeReference => - attribsToExprInProj.get(attr).map { - case (_, expr) => expr - }.getOrElse(attr) - }).asInstanceOf[NamedExpression] - } + case ne => expressionRemapper(ne, attribsToExprInProj).asInstanceOf[NamedExpression] } + } remappedNewProjListResult match { case Success(remappedNewProjList) => @@ -174,3 +185,4 @@ private[sql] object EarlyCollapseProject extends Rule[LogicalPlan] { } } + diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out index 8fab5e05dac7..91178c8c1042 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out @@ -89,8 +89,8 @@ Project [k#x, v1#x, v2#x] -- !query SELECT * FROM nt1 natural join nt2 where k = "one" -- !query analysis -Filter (k#x = one) -+- Project [k#x, v1#x, v2#x] +Project [k#x, v1#x, v2#x] ++- Filter (k#x = one) +- Join Inner, (k#x = k#x) :- SubqueryAlias nt1 : +- View (`nt1`, [k#x, v1#x]) @@ -190,8 +190,8 @@ Project [k#x] -- !query SELECT k FROM nt1 natural join nt2 where k = "one" -- !query analysis -Filter (k#x = one) -+- Project [k#x] +Project [k#x] ++- Filter (k#x = one) +- Join Inner, (k#x = k#x) :- SubqueryAlias nt1 : +- View (`nt1`, [k#x, v1#x]) @@ -392,8 +392,8 @@ Sort [key#x ASC NULLS FIRST], true -- !query SELECT nt1.k, nt2.k FROM nt1 natural join nt2 where k = "one" -- !query analysis -Filter (k#x = one) -+- Project [k#x, k#x] +Project [k#x, k#x] ++- Filter (k#x = one) +- Join Inner, (k#x = k#x) :- SubqueryAlias nt1 : +- View (`nt1`, [k#x, v1#x]) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out index bce1759a37cb..f5188c21aa7d 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out @@ -621,8 +621,8 @@ Sort [i#x ASC NULLS FIRST, k#x ASC NULLS FIRST, t#x ASC NULLS FIRST], true SELECT '' AS `xxx`, * FROM J1_TBL LEFT JOIN J2_TBL USING (i) WHERE (k = 1) -- !query analysis -Filter (k#x = 1) -+- Project [ AS xxx#x, i#x, j#x, t#x, k#x] +Project [ AS xxx#x, i#x, j#x, t#x, k#x] ++- Filter (k#x = 1) +- Join LeftOuter, (i#x = i#x) :- SubqueryAlias spark_catalog.default.j1_tbl : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet @@ -634,8 +634,8 @@ Filter (k#x = 1) SELECT '' AS `xxx`, * FROM J1_TBL LEFT JOIN J2_TBL USING (i) WHERE (i = 1) -- !query analysis -Filter (i#x = 1) -+- Project [ AS xxx#x, i#x, j#x, t#x, k#x] +Project [ AS xxx#x, i#x, j#x, t#x, k#x] ++- Filter (i#x = 1) +- Join LeftOuter, (i#x = i#x) :- SubqueryAlias spark_catalog.default.j1_tbl : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out index bda91030bafa..7a505fe4fc10 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out @@ -627,12 +627,11 @@ SELECT udf('') AS `xxx`, udf(i), udf(j), udf(t), udf(udf(k)) -- !query analysis Project [cast(udf(cast( as string)) as string) AS xxx#x, cast(udf(cast(i#x as string)) as int) AS udf(i)#x, cast(udf(cast(j#x as string)) as int) AS udf(j)#x, cast(udf(cast(t#x as string)) as string) AS udf(t)#x, cast(udf(cast(cast(udf(cast(k#x as string)) as int) as string)) as int) AS udf(udf(k))#x] +- Filter (cast(udf(cast(k#x as string)) as int) = 1) - +- Project [i#x, j#x, t#x, k#x] - +- Join LeftOuter, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet + +- Join LeftOuter, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -641,12 +640,11 @@ SELECT udf('') AS `xxx`, udf(i), udf(j), udf(t), udf(k) -- !query analysis Project [cast(udf(cast( as string)) as string) AS xxx#x, cast(udf(cast(i#x as string)) as int) AS udf(i)#x, cast(udf(cast(j#x as string)) as int) AS udf(j)#x, cast(udf(cast(t#x as string)) as string) AS udf(t)#x, cast(udf(cast(k#x as string)) as int) AS udf(k)#x] +- Filter (cast(udf(cast(cast(udf(cast(i#x as string)) as int) as string)) as int) = cast(udf(cast(1 as string)) as int)) - +- Project [i#x, j#x, t#x, k#x] - +- Join LeftOuter, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet + +- Join LeftOuter, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-natural-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-natural-join.sql.out index d80507eb5d8d..75e48c67f752 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-natural-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-natural-join.sql.out @@ -36,8 +36,8 @@ CreateViewCommand `nt2`, select * from values -- !query SELECT * FROM nt1 natural join nt2 where udf(k) = "one" -- !query analysis -Filter (cast(udf(cast(k#x as string)) as string) = one) -+- Project [k#x, v1#x, v2#x] +Project [k#x, v1#x, v2#x] ++- Filter (cast(udf(cast(k#x as string)) as string) = one) +- Join Inner, (k#x = k#x) :- SubqueryAlias nt1 : +- View (`nt1`, [k#x, v1#x]) From 45b03f35b1f0052801cda7d938c6c6d2c71db8d7 Mon Sep 17 00:00:00 2001 From: ashahid Date: Fri, 5 Jan 2024 09:46:43 -0800 Subject: [PATCH 121/129] SPARK-45959.refactored CacheManager code to simplify. added a new test --- .../spark/sql/execution/CacheManager.scala | 47 +++++++------------ .../spark/sql/EarlyCollapseProjectSuite.scala | 9 ++++ 2 files changed, 25 insertions(+), 31 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index 561fd09a8275..1101d1281290 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -24,10 +24,8 @@ import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.internal.Logging import org.apache.spark.internal.config.ConfigEntry import org.apache.spark.sql.{Dataset, SparkSession} -import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.catalog.HiveTableRelation -import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, LeafExpression, NamedExpression, SubqueryExpression} -import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, NamedExpression, SubqueryExpression} import org.apache.spark.sql.catalyst.optimizer.EliminateResolvedHint import org.apache.spark.sql.catalyst.plans.logical.{Filter, IgnoreCachedData, LogicalPlan, Project, ResolvedHint, SubqueryAlias, UnaryNode, View} import org.apache.spark.sql.catalyst.trees.TreePattern.PLAN_EXPRESSION @@ -37,7 +35,6 @@ import org.apache.spark.sql.execution.command.CommandUtils import org.apache.spark.sql.execution.datasources.{FileIndex, HadoopFsRelation, LogicalRelation} import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileTable} import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.types.DataType import org.apache.spark.storage.StorageLevel import org.apache.spark.storage.StorageLevel.MEMORY_AND_DISK import org.apache.spark.util.ArrayImplicits._ @@ -400,31 +397,29 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { directlyMappedincomingToCachedPlanIndx, cdAttribToCommonAttribForIncmngNe, unusedAttribsOfCDPlanToGenIncomingAttr, canonicalizedCdProjList) - if (transformedIndirectlyMappableExpr.forall(_._2.references.isEmpty)) { - val projectionToForceOnCdPlan = cachedPlan.output.zipWithIndex.map { - case (cdAttr, i) => - cdAttribToCommonAttribForIncmngNe.getOrElse(cdAttr, - unusedAttribsOfCDPlanToGenIncomingAttr.find(_._1 == i).map(_._2).get) - } + val projectionToForceOnCdPlan = cachedPlan.output.zipWithIndex.map { + case (cdAttr, i) => + cdAttribToCommonAttribForIncmngNe.getOrElse(cdAttr, + unusedAttribsOfCDPlanToGenIncomingAttr.find(_._1 == i).map(_._2).get) + } + val forcedAttribset = AttributeSet(projectionToForceOnCdPlan) + if (transformedIndirectlyMappableExpr.forall( + _._2.references.subsetOf(forcedAttribset))) { val transformedIntermediateFilters = transformFilters(skippedFilters, projectionToForceOnCdPlan, canonicalizedCdProjList) - if (transformedIntermediateFilters.forall(_.references.isEmpty)) { + if (transformedIntermediateFilters.forall(_.references.subsetOf(forcedAttribset))) { val modifiedInProj = replacementProjectListForIncomingProject(incomingProject, directlyMappedincomingToCachedPlanIndx, cdPlanProject, cdAttribToCommonAttribForIncmngNe, transformedIndirectlyMappableExpr) - val actualTransformedFilters = transformedIntermediateFilters.map( - _.transformExpressions { - case Replaceable(attr) => attr - }) val root = cd.cachedRepresentation.toOption.get.withOutput( projectionToForceOnCdPlan) - val newPartialPlan = if (actualTransformedFilters.isEmpty) { + val newPartialPlan = if (transformedIntermediateFilters.isEmpty) { Project(modifiedInProj, root) } else { - val lastFilterNode = actualTransformedFilters.last + val lastFilterNode = transformedIntermediateFilters.last val lastFilterMod = lastFilterNode.copy( child = root) - val filterChain = actualTransformedFilters.dropRight(1).foldRight( + val filterChain = transformedIntermediateFilters.dropRight(1).foldRight( lastFilterMod)((f, c) => f.copy( child = c)) Project(modifiedInProj, filterChain) } @@ -452,7 +447,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { val transformedCondn = f.condition.transformDown { case expr => val matchedIndex = canonicalizedCdProjAsExpr.indexWhere(_ == expr) if (matchedIndex != -1) { - Replaceable(projectionToForceOnCdPlan(matchedIndex)) + projectionToForceOnCdPlan(matchedIndex) } else { expr } @@ -497,9 +492,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { ) } }.getOrElse({ - transformedIndirectlyMappableExpr(indx).transformUp { - case Replaceable(attribToUse) => attribToUse - }.asInstanceOf[NamedExpression] + transformedIndirectlyMappableExpr(indx) }) } } @@ -531,7 +524,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { case (i, _) => val cdNe = canonicalizedCdProjList(i) cdNe.children.headOption.contains(expr) }.map(_._2)). - map(ne => Replaceable(ne.toAttribute)).getOrElse(expr) + map(ne => ne.toAttribute).getOrElse(expr) }.asInstanceOf[NamedExpression] incomngIndex -> modifiedNe @@ -687,14 +680,6 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { } } -private case class Replaceable(attribToUse: Attribute) extends LeafExpression { - override def nullable: Boolean = false - override def eval(input: InternalRow): Any = throw new UnsupportedOperationException() - override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = - throw new UnsupportedOperationException() - override def dataType: DataType = attribToUse.dataType -} - object CacheManager { val inMemoryRelationExtractor: LogicalPlan => InMemoryRelation = plan => plan.collectLeaves().head.asInstanceOf[InMemoryRelation] diff --git a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala index 3eb325b5a071..08cbcbf920fe 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala @@ -194,6 +194,15 @@ class EarlyCollapseProjectSuite extends QueryTest checkProjectCollapseAndCacheUse(baseDfCreator, df => df.withColumnsRenamed( Map("c" -> "c1", "a" -> "a1", "b" -> "b1", "d" -> "d1"))) } + + test("early collapse of filter chain with project ") { + val baseDfCreator = () => spark.range(20).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b") + + checkProjectCollapseAndCacheUse(baseDfCreator, df => df.filter($"a" > 4). + filter($"c" * $"b" < 60). + select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e")) + } test("resurrection of intermediate dropped cols when used in filter") { val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). From d3b7a2fe16db7315b165f2e5c2dfb43a83aa0a44 Mon Sep 17 00:00:00 2001 From: ashahid Date: Fri, 5 Jan 2024 19:20:18 -0800 Subject: [PATCH 122/129] SPARK-45959.refactored CacheManager code. Handled the case of InMemoryRelation not being used due to filter chain case not handled. added new tests --- .../spark/sql/execution/CacheManager.scala | 305 +++++++++++------- .../spark/sql/EarlyCollapseProjectSuite.scala | 12 +- 2 files changed, 195 insertions(+), 122 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index 1101d1281290..4e09a2b96cf6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -25,11 +25,12 @@ import org.apache.spark.internal.Logging import org.apache.spark.internal.config.ConfigEntry import org.apache.spark.sql.{Dataset, SparkSession} import org.apache.spark.sql.catalyst.catalog.HiveTableRelation -import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, NamedExpression, SubqueryExpression} +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, AttributeReference, AttributeSet, Expression, NamedExpression, SubqueryExpression} import org.apache.spark.sql.catalyst.optimizer.EliminateResolvedHint -import org.apache.spark.sql.catalyst.plans.logical.{Filter, IgnoreCachedData, LogicalPlan, Project, ResolvedHint, SubqueryAlias, UnaryNode, View} +import org.apache.spark.sql.catalyst.plans.logical.{Filter, IgnoreCachedData, LeafNode, LogicalPlan, Project, ResolvedHint, SubqueryAlias, View} import org.apache.spark.sql.catalyst.trees.TreePattern.PLAN_EXPRESSION import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper +import org.apache.spark.sql.execution.analysis.EarlyCollapseProject import org.apache.spark.sql.execution.columnar.InMemoryRelation import org.apache.spark.sql.execution.command.CommandUtils import org.apache.spark.sql.execution.datasources.{FileIndex, HadoopFsRelation, LogicalRelation} @@ -317,119 +318,113 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { /** Optionally returns cached data for the given [[LogicalPlan]]. */ def lookupCachedData(plan: LogicalPlan): Option[CachedData] = { val fullMatch = cachedData.find(cd => plan.sameResult(cd.plan)) - fullMatch.map(Option(_)).getOrElse(lookUpPartiallyMatchedCachedPlan(plan)) + fullMatch.map(Option(_)).getOrElse( + plan match { + case p: Project => lookUpPartiallyMatchedCachedPlan(p) + case _ => None + }) } - private def lookUpPartiallyMatchedCachedPlan(plan: LogicalPlan): Option[CachedData] = { + private def lookUpPartiallyMatchedCachedPlan(incomingProject: Project): Option[CachedData] = { var foundMatch = false var partialMatch: Option[CachedData] = None + val (incmngchild, incomingFilterChain) = + CompatibilityChecker.extractChildIgnoringFiltersFromIncomingProject(incomingProject) for (cd <- cachedData if !foundMatch) { - (plan, cd.plan) match { - case (incomingPlan: UnaryNode, cachedPlan: UnaryNode) => - val (incmngchild, skippedFilters) = extractChildIgnoringFilters(incomingPlan) - if (incmngchild.sameResult(cachedPlan.child)) { - if (incomingPlan.getClass == cachedPlan.getClass && - incomingPlan.isInstanceOf[Project]) { - val incomingProject = incomingPlan.asInstanceOf[Project] - val cdPlanProject = cachedPlan.asInstanceOf[Project] - // since the child of both incoming and cached plan are same - // that is why we are here. for mapping and comparison purposes lets - // canonicalize the cachedPlan's project list in terms of the incoming plan's child - // so that we can map correctly. - val cdPlanToIncomngPlanChildOutputMapping = - cdPlanProject.child.output.zip(incmngchild.output).toMap - - val canonicalizedCdProjList = cdPlanProject.projectList.map(_.transformUp { - case attr: Attribute => cdPlanToIncomngPlanChildOutputMapping(attr) - }.asInstanceOf[NamedExpression]) - - // matchIndexInCdPlanProj remains -1 in the end, it indicates it is - // new cols created out of existing output attribs - val (directlyMappedincomingToCachedPlanIndx, inComingProjNoDirectMapping) = - getDirectAndIndirectMappingOfIncomingToCachedProjectAttribs( - incomingProject, canonicalizedCdProjList) - - // Now there is a possible case where a literal is present in IMR as attribute - // and the incoming project also has that literal somewhere in the alias. Though - // we do not need to read it but looks like the deserializer fails if we skip that - // literal in the projection enforced on IMR. so in effect even if we do not - // require an attribute it still needs to be present in the projection forced - // also its possible that some attribute from IMR can be used in subexpression - // of the incoming projection. so we have to handle that - val unusedAttribsOfCDPlanToGenIncomingAttr = - cdPlanProject.projectList.indices.filterNot(i => - directlyMappedincomingToCachedPlanIndx.exists(_._2 == i)).map(i => { - val cdAttrib = cdPlanProject.projectList(i) - i -> AttributeReference(cdAttrib.name, cdAttrib.dataType, - cdAttrib.nullable, cdAttrib.metadata)(qualifier = cdAttrib.qualifier) - }) - - // Because in case of rename multiple incmong named exprs ( attribute or aliases) - // will point to a common cdplan attrib, we need to ensure they do not create - // separate attribute in the the modifiedProject for incoming plan.. - // that is a single attribute ref is present in all mixes of rename and pass thru - // attributes. - // so we will use the first attribute ref in the incoming directly mapped project - // or if no attrib exists ( only case of rename) we will pick the child expr which - // is bound to be an attribute as the common ref. - val cdAttribToCommonAttribForIncmngNe = directlyMappedincomingToCachedPlanIndx.map { - case (inAttribIndex, cdAttribIndex) => - cdPlanProject.projectList(cdAttribIndex).toAttribute -> - incomingProject.projectList(inAttribIndex) - }.groupBy(_._1).map { - case (cdAttr, incomngSeq) => - val incmngCommonAttrib = incomngSeq.map(_._2).flatMap { - case attr: Attribute => Seq(attr) - case Alias(attr: Attribute, _) => Seq(attr) - case _ => Seq.empty - }.headOption.getOrElse( - AttributeReference(cdAttr.name, cdAttr.dataType, cdAttr.nullable)()) - cdAttr -> incmngCommonAttrib - } + (incmngchild, incomingFilterChain, cd.plan) match { + case CompatibilityChecker(residualIncomingFilterChain, cdPlanProject) => + // since the child of both incoming and cached plan are same + // that is why we are here. for mapping and comparison purposes lets + // canonicalize the cachedPlan's project list in terms of the incoming plan's child + // so that we can map correctly. + val cdPlanToIncomngPlanChildOutputMapping = + cdPlanProject.child.output.zip(incmngchild.output).toMap + + val canonicalizedCdProjList = cdPlanProject.projectList.map(_.transformUp { + case attr: Attribute => cdPlanToIncomngPlanChildOutputMapping(attr) + }.asInstanceOf[NamedExpression]) + + // matchIndexInCdPlanProj remains -1 in the end, it indicates it is + // new cols created out of existing output attribs + val (directlyMappedincomingToCachedPlanIndx, inComingProjNoDirectMapping) = + getDirectAndIndirectMappingOfIncomingToCachedProjectAttribs( + incomingProject, canonicalizedCdProjList) + + // Now there is a possible case where a literal is present in IMR as attribute + // and the incoming project also has that literal somewhere in the alias. Though + // we do not need to read it but looks like the deserializer fails if we skip that + // literal in the projection enforced on IMR. so in effect even if we do not + // require an attribute it still needs to be present in the projection forced + // also its possible that some attribute from IMR can be used in subexpression + // of the incoming projection. so we have to handle that + val unusedAttribsOfCDPlanToGenIncomingAttr = + cdPlanProject.projectList.indices.filterNot(i => + directlyMappedincomingToCachedPlanIndx.exists(_._2 == i)).map(i => { + val cdAttrib = cdPlanProject.projectList(i) + i -> AttributeReference(cdAttrib.name, cdAttrib.dataType, + cdAttrib.nullable, cdAttrib.metadata)(qualifier = cdAttrib.qualifier) + }) + + // Because in case of rename multiple incmong named exprs ( attribute or aliases) + // will point to a common cdplan attrib, we need to ensure they do not create + // separate attribute in the the modifiedProject for incoming plan.. + // that is a single attribute ref is present in all mixes of rename and pass thru + // attributes. + // so we will use the first attribute ref in the incoming directly mapped project + // or if no attrib exists ( only case of rename) we will pick the child expr which + // is bound to be an attribute as the common ref. + val cdAttribToCommonAttribForIncmngNe = directlyMappedincomingToCachedPlanIndx.map { + case (inAttribIndex, cdAttribIndex) => + cdPlanProject.projectList(cdAttribIndex).toAttribute -> + incomingProject.projectList(inAttribIndex) + }.groupBy(_._1).map { + case (cdAttr, incomngSeq) => + val incmngCommonAttrib = incomngSeq.map(_._2).flatMap { + case attr: Attribute => Seq(attr) + case Alias(attr: Attribute, _) => Seq(attr) + case _ => Seq.empty + }.headOption.getOrElse( + AttributeReference(cdAttr.name, cdAttr.dataType, cdAttr.nullable)()) + cdAttr -> incmngCommonAttrib + } - // If expressions of inComingProjNoDirectMapping can be expressed in terms of the - // incoming attribute refs or incoming alias exprs, which can be mapped directly - // to the CachedPlan's output, we are good. so lets transform such indirectly - // mappable named expressions in terms of mappable attributes of the incoming plan - val transformedIndirectlyMappableExpr = - transformIndirectlyMappedExpressionsToUseCachedPlanAttributes( - inComingProjNoDirectMapping, incomingProject, cdPlanProject, - directlyMappedincomingToCachedPlanIndx, cdAttribToCommonAttribForIncmngNe, - unusedAttribsOfCDPlanToGenIncomingAttr, canonicalizedCdProjList) - - val projectionToForceOnCdPlan = cachedPlan.output.zipWithIndex.map { - case (cdAttr, i) => - cdAttribToCommonAttribForIncmngNe.getOrElse(cdAttr, - unusedAttribsOfCDPlanToGenIncomingAttr.find(_._1 == i).map(_._2).get) - } - val forcedAttribset = AttributeSet(projectionToForceOnCdPlan) - if (transformedIndirectlyMappableExpr.forall( - _._2.references.subsetOf(forcedAttribset))) { - val transformedIntermediateFilters = transformFilters(skippedFilters, - projectionToForceOnCdPlan, canonicalizedCdProjList) - if (transformedIntermediateFilters.forall(_.references.subsetOf(forcedAttribset))) { - val modifiedInProj = replacementProjectListForIncomingProject(incomingProject, - directlyMappedincomingToCachedPlanIndx, cdPlanProject, - cdAttribToCommonAttribForIncmngNe, transformedIndirectlyMappableExpr) - val root = cd.cachedRepresentation.toOption.get.withOutput( - projectionToForceOnCdPlan) - val newPartialPlan = if (transformedIntermediateFilters.isEmpty) { - Project(modifiedInProj, root) - } else { - val lastFilterNode = transformedIntermediateFilters.last - val lastFilterMod = lastFilterNode.copy( - child = root) - val filterChain = transformedIntermediateFilters.dropRight(1).foldRight( - lastFilterMod)((f, c) => f.copy( child = c)) - Project(modifiedInProj, filterChain) - } - partialMatch = Option(cd.copy(cachedRepresentation = Left(newPartialPlan))) - foundMatch = true - } + // If expressions of inComingProjNoDirectMapping can be expressed in terms of the + // incoming attribute refs or incoming alias exprs, which can be mapped directly + // to the CachedPlan's output, we are good. so lets transform such indirectly + // mappable named expressions in terms of mappable attributes of the incoming plan + val transformedIndirectlyMappableExpr = + transformIndirectlyMappedExpressionsToUseCachedPlanAttributes( + inComingProjNoDirectMapping, incomingProject, cdPlanProject, + directlyMappedincomingToCachedPlanIndx, cdAttribToCommonAttribForIncmngNe, + unusedAttribsOfCDPlanToGenIncomingAttr, canonicalizedCdProjList) + + val projectionToForceOnCdPlan = cdPlanProject.output.zipWithIndex.map { + case (cdAttr, i) => + cdAttribToCommonAttribForIncmngNe.getOrElse(cdAttr, + unusedAttribsOfCDPlanToGenIncomingAttr.find(_._1 == i).map(_._2).get) + } + val forcedAttribset = AttributeSet(projectionToForceOnCdPlan) + if (transformedIndirectlyMappableExpr.forall( + _._2.references.subsetOf(forcedAttribset))) { + val transformedIntermediateFilters = transformFilters(residualIncomingFilterChain, + projectionToForceOnCdPlan, canonicalizedCdProjList) + if (transformedIntermediateFilters.forall(_.references.subsetOf(forcedAttribset))) { + val modifiedInProj = replacementProjectListForIncomingProject(incomingProject, + directlyMappedincomingToCachedPlanIndx, cdPlanProject, + cdAttribToCommonAttribForIncmngNe, transformedIndirectlyMappableExpr) + val root = cd.cachedRepresentation.toOption.get.withOutput( + projectionToForceOnCdPlan) + val newPartialPlan = if (transformedIntermediateFilters.isEmpty) { + Project(modifiedInProj, root) + } else { + val chainedFilter = CompatibilityChecker.combineFilterChainUsingRoot( + transformedIntermediateFilters, root) + Project(modifiedInProj, chainedFilter) } + partialMatch = Option(cd.copy(cachedRepresentation = Left(newPartialPlan))) + foundMatch = true } } - case _ => } } @@ -456,20 +451,6 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { }) } - private def extractChildIgnoringFilters(incomingPlan: UnaryNode): (LogicalPlan, Seq[Filter]) = { - val collectedFilters = mutable.ListBuffer[Filter]() - var child: LogicalPlan = incomingPlan.child - var keepChecking = true - while(keepChecking) { - child match { - case f: Filter => child = f.child - collectedFilters += f - case _ => keepChecking = false - } - } - (child, collectedFilters.toSeq) - } - private def replacementProjectListForIncomingProject( incomingProject: Project, directlyMappedincomingToCachedPlanIndx: Seq[(Int, Int)], @@ -684,3 +665,87 @@ object CacheManager { val inMemoryRelationExtractor: LogicalPlan => InMemoryRelation = plan => plan.collectLeaves().head.asInstanceOf[InMemoryRelation] } + +object CompatibilityChecker { + def unapply(data: (LogicalPlan, Seq[Filter], LogicalPlan)): Option[(Seq[Filter], Project)] = { + val(incomingChild, incomingFilterChain, cachedPlan) = data + cachedPlan match { + case p: Project if incomingChild.sameResult(p.child) => Option(incomingFilterChain -> p) + + case f: Filter => + val collectedFilters = mutable.ListBuffer[Filter](f) + var projectFound: Option[Project] = None + var child: LogicalPlan = f.child + var keepChecking = true + while (keepChecking) { + child match { + case x: Filter => child = x.child + collectedFilters += x + case p: Project => projectFound = Option(p) + keepChecking = false + case _ => keepChecking = false + } + } + if (collectedFilters.size <= incomingFilterChain.size && + projectFound.exists(_.child.sameResult(incomingChild))) { + val (residualIncomingFilterChain, otherFilterChain) = incomingFilterChain.splitAt( + incomingFilterChain.size - collectedFilters.size) + val isCompatible = if (otherFilterChain.isEmpty) { + true + } else { + // the other filter chain must be equal to the collected filter chain + // But we need to transform the collected Filter chain such that it is below + // the project of the cached plan, we have found, as the incoming filters are also below + // the incoming project. + val mappingFilterExpr = AttributeMap(projectFound.get.projectList.flatMap { + case _: Attribute => Seq.empty[(Attribute, (NamedExpression, Expression))] + case al: Alias => Seq(al.toAttribute -> (al, al.child)) + }) + + val modifiedCdFilters = collectedFilters.map(f => + f.copy(condition = EarlyCollapseProject.expressionRemapper( + f.condition, mappingFilterExpr))).toSeq + val chainedFilter1 = combineFilterChainUsingRoot(otherFilterChain, + EmptyRelation(incomingChild.output)) + val chainedFilter2 = combineFilterChainUsingRoot(modifiedCdFilters, + EmptyRelation(projectFound.map(_.child).get.output)) + chainedFilter1.sameResult(chainedFilter2) + } + if (isCompatible) { + Option(residualIncomingFilterChain -> projectFound.get) + } else { + None + } + } else { + None + } + + case _ => None + } + } + + def combineFilterChainUsingRoot(filters: Seq[Filter], root: LogicalPlan): Filter = { + val lastFilterNode = filters.last + val lastFilterMod = lastFilterNode.copy(child = root) + filters.dropRight(1).foldRight(lastFilterMod)((f, c) => f.copy(child = c)) + } + + def extractChildIgnoringFiltersFromIncomingProject(incomingProject: Project): + (LogicalPlan, Seq[Filter]) = { + val collectedFilters = mutable.ListBuffer[Filter]() + var child: LogicalPlan = incomingProject.child + var keepChecking = true + while (keepChecking) { + child match { + case f: Filter => child = f.child + collectedFilters += f + case _ => keepChecking = false + } + } + (child, collectedFilters.toSeq) + } + + case class EmptyRelation(output: Seq[Attribute]) extends LeafNode { + override def maxRows: Option[Long] = Some(0) + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala index 08cbcbf920fe..c7f29b271c25 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala @@ -194,8 +194,8 @@ class EarlyCollapseProjectSuite extends QueryTest checkProjectCollapseAndCacheUse(baseDfCreator, df => df.withColumnsRenamed( Map("c" -> "c1", "a" -> "a1", "b" -> "b1", "d" -> "d1"))) } - - test("early collapse of filter chain with project ") { + + test("early collapse of filter chain with project - 1") { val baseDfCreator = () => spark.range(20).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b") @@ -204,6 +204,14 @@ class EarlyCollapseProjectSuite extends QueryTest select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e")) } + test("early collapse of filter chain with project - 2") { + val baseDfCreator = () => spark.range(20).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b").filter($"a" > 4).filter($"c" * $"b" < 60) + + checkProjectCollapseAndCacheUse(baseDfCreator, df => df.filter($"b" < 100). + select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e")) + } + test("resurrection of intermediate dropped cols when used in filter") { val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"b").select($"c", $"b", $"c" + 7 as "d") From 2fb1e75d9a4960b2dc30c93799c4fd71234281c8 Mon Sep 17 00:00:00 2001 From: ashahid Date: Tue, 9 Jan 2024 12:54:05 -0800 Subject: [PATCH 123/129] SPARK-45959. added tests to validate used of nested InMemoryRelations use --- .../spark/sql/EarlyCollapseProjectSuite.scala | 10 ++--- ...EarlyCollapseProjectWithCachingSuite.scala | 42 +++++++++++++++++++ 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala index c7f29b271c25..4a9e9aef1162 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala @@ -196,7 +196,7 @@ class EarlyCollapseProjectSuite extends QueryTest } test("early collapse of filter chain with project - 1") { - val baseDfCreator = () => spark.range(20).select($"id" as "a", $"id" as "b"). + val baseDfCreator = () => spark.range(100).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b") checkProjectCollapseAndCacheUse(baseDfCreator, df => df.filter($"a" > 4). @@ -205,7 +205,7 @@ class EarlyCollapseProjectSuite extends QueryTest } test("early collapse of filter chain with project - 2") { - val baseDfCreator = () => spark.range(20).select($"id" as "a", $"id" as "b"). + val baseDfCreator = () => spark.range(100).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").filter($"a" > 4).filter($"c" * $"b" < 60) checkProjectCollapseAndCacheUse(baseDfCreator, df => df.filter($"b" < 100). @@ -213,7 +213,7 @@ class EarlyCollapseProjectSuite extends QueryTest } test("resurrection of intermediate dropped cols when used in filter") { - val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). + val baseDfCreator = () => spark.range(100).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"b").select($"c", $"b", $"c" + 7 as "d") // A dropped column would result in a new project being added on top of filter // so we have to take into account of that extra project added while checking @@ -223,7 +223,7 @@ class EarlyCollapseProjectSuite extends QueryTest } test("resurrection of right renamed intermediate dropped cols when used in filter") { - val baseDfCreator = () => spark.range(10).select($"id" + 7 as "a", $"id" as "b"). + val baseDfCreator = () => spark.range(100).select($"id" + 7 as "a", $"id" as "b"). select($"a" + 1 as "c", $"b", $"a" * $"b" as "a").select($"c", $"b", $"c" + 7 as "d") // A dropped column would result in a new project being added on top of filter // so we have to take into account of that extra project added while checking @@ -232,7 +232,7 @@ class EarlyCollapseProjectSuite extends QueryTest Map("c" -> "c1", "b" -> "b1", "d" -> "d1")).select($"c1", $"d1").filter($"a" > 25)) } - private def checkProjectCollapseAndCacheUse( + protected def checkProjectCollapseAndCacheUse( baseDfCreator: () => DataFrame, testExec: DataFrame => DataFrame): Unit = { val baseDf = baseDfCreator() diff --git a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectWithCachingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectWithCachingSuite.scala index 218bf11bf7d8..f24f0003981e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectWithCachingSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectWithCachingSuite.scala @@ -17,6 +17,48 @@ package org.apache.spark.sql +import org.apache.spark.sql.execution.analysis.EarlyCollapseProject +import org.apache.spark.sql.execution.columnar.InMemoryRelation +import org.apache.spark.sql.internal.SQLConf + class EarlyCollapseProjectWithCachingSuite extends EarlyCollapseProjectSuite { + import testImplicits._ override val useCaching: Boolean = true + + test("check for nested InMemoryRelations") { + val baseDfCreator = () => spark.range(1000).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b").filter($"a" > 4).filter($"c" * $"b" < 60) + + checkProjectCollapseAndCacheUse(baseDfCreator, df => df.filter($"b" < 100). + select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e")) + + // there is already a cached base Df + val df1 = baseDfCreator().filter($"b" < 100). + select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e") + df1.cache() + + val df2 = baseDfCreator().filter($"b" < 100). + select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e"). + select($"c" * $"a" as "c", $"c" * $"b" as "a", $"e").filter($"c" > 73). + filter($"d" < 300) + val rows = df2.collect() + assert(rows.length > 0) + // there should be 2 nested In Memory Relations + val optimizedPlan = df2.queryExecution.optimizedPlan + val leaf1 = optimizedPlan.collectLeaves().head + assert(leaf1.isInstanceOf[InMemoryRelation]) + val imr1 = leaf1.asInstanceOf[InMemoryRelation] + val leaf2 = imr1.queryExecution.optimizedPlan.collectLeaves().head + assert(leaf2.isInstanceOf[InMemoryRelation]) + df1.unpersist() + baseDfCreator().unpersist() + val fullyUnopt = withSQLConf( + SQLConf.EXCLUDE_POST_ANALYSIS_RULES.key -> EarlyCollapseProject.ruleName) { + baseDfCreator().filter($"b" < 100). + select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e"). + select($"c" * $"a" as "c", $"c" * $"b" as "a", $"e").filter($"c" > 73). + filter($"d" < 300) + } + checkAnswer(fullyUnopt, rows) + } } From 5470c86ae995dbe28dfedac34fd60b2f0c33622a Mon Sep 17 00:00:00 2001 From: ashahid Date: Wed, 27 Mar 2024 12:42:51 -0700 Subject: [PATCH 124/129] SPARK-45959. Fixed a test corresponding to new issue SPARK-47609 --- .../apache/spark/sql/DatasetCacheSuite.scala | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala index bda8c7f26082..83b38ef3d343 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql import org.scalatest.concurrent.TimeLimits import org.scalatest.time.SpanSugar._ - +import org.apache.spark.sql.execution.{ProjectExec, UnaryExecNode} import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.execution.columnar.{InMemoryRelation, InMemoryTableScanExec} import org.apache.spark.sql.functions._ @@ -244,15 +244,27 @@ class DatasetCacheSuite extends QueryTest case i: InMemoryRelation => i.cacheBuilder.cachedPlan } assert(df1LimitInnerPlan.isDefined && df1LimitInnerPlan.get == df1InnerPlan) - - // Verify that df2's cache has been re-cached, with a new physical plan rid of dependency - // on df, since df2's cache had not been loaded before df.unpersist(). val df2Limit = df2.limit(2) val df2LimitInnerPlan = df2Limit.queryExecution.withCachedData.collectFirst { case i: InMemoryRelation => i.cacheBuilder.cachedPlan } + // The assertion below is incorrect in context of bug SPARK-47609. + // as df2 is derivable from df1 ( which is an InMemoryRelation) + + /* + // Verify that df2's cache has been re-cached, with a new physical plan rid of dependency + // on df, since df2's cache had not been loaded before df.unpersist(). assert(df2LimitInnerPlan.isDefined && !df2LimitInnerPlan.get.exists(_.isInstanceOf[InMemoryTableScanExec])) + */ + assert(df2LimitInnerPlan.isDefined) + val innerImr = df2LimitInnerPlan.get.collectFirst { + case imrExec: InMemoryTableScanExec => imrExec.relation + } + assert(innerImr.isDefined) + assert(innerImr.get.cacheBuilder.cachedPlan.asInstanceOf[UnaryExecNode]. + child.isInstanceOf[ProjectExec]) + } test("SPARK-27739 Save stats from optimized plan") { From c3dea677a843c5afd724474198529a2887c17caf Mon Sep 17 00:00:00 2001 From: ashahid Date: Wed, 27 Mar 2024 13:45:44 -0700 Subject: [PATCH 125/129] SPARK-45959. Fixed scalastyle issue --- .../src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala index 83b38ef3d343..09dd9eb95d84 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql import org.scalatest.concurrent.TimeLimits import org.scalatest.time.SpanSugar._ + import org.apache.spark.sql.execution.{ProjectExec, UnaryExecNode} import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.execution.columnar.{InMemoryRelation, InMemoryTableScanExec} @@ -264,7 +265,6 @@ class DatasetCacheSuite extends QueryTest assert(innerImr.isDefined) assert(innerImr.get.cacheBuilder.cachedPlan.asInstanceOf[UnaryExecNode]. child.isInstanceOf[ProjectExec]) - } test("SPARK-27739 Save stats from optimized plan") { From 2f7dbda708942c9943ef94b9a7f3da740f7f950c Mon Sep 17 00:00:00 2001 From: ashahid Date: Thu, 28 Mar 2024 14:29:46 -0700 Subject: [PATCH 126/129] SPARK-45959. added tests for verifying plans uncaching is happening correctly. Fixed the PR related to the uncaching issues found in testing --- .../spark/sql/execution/CacheManager.scala | 59 +++++++++++++------ ...EarlyCollapseProjectWithCachingSuite.scala | 27 ++++++++- 2 files changed, 66 insertions(+), 20 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index cf59cc885143..372f52327590 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -154,18 +154,30 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { plan: LogicalPlan, cascade: Boolean, blocking: Boolean = false): Unit = { - uncacheQuery(spark, _.sameResult(plan), cascade, blocking) + val dummyCd = CachedData(plan, Left(plan)) + uncacheQuery(spark, + (planToCheck: LogicalPlan, partialMatchOk: Boolean) => { + dummyCd.plan.sameResult(planToCheck) || (partialMatchOk && + (planToCheck match { + case p: Project => lookUpPartiallyMatchedCachedPlan(p, IndexedSeq(dummyCd)).isDefined + case _ => false + })) + }, cascade, blocking) } def uncacheTableOrView(spark: SparkSession, name: Seq[String], cascade: Boolean): Unit = { uncacheQuery( spark, - isMatchedTableOrView(_, name, spark.sessionState.conf), + isMatchedTableOrView(_, _, name, spark.sessionState.conf), cascade, blocking = false) } - private def isMatchedTableOrView(plan: LogicalPlan, name: Seq[String], conf: SQLConf): Boolean = { + private def isMatchedTableOrView( + plan: LogicalPlan, + partialMatch: Boolean, + name: Seq[String], + conf: SQLConf): Boolean = { def isSameName(nameInCache: Seq[String]): Boolean = { nameInCache.length == name.length && nameInCache.zip(name).forall(conf.resolver.tupled) } @@ -194,14 +206,14 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { def uncacheQuery( spark: SparkSession, - isMatchedPlan: LogicalPlan => Boolean, + isMatchedPlan: (LogicalPlan, Boolean) => Boolean, cascade: Boolean, blocking: Boolean): Unit = { - val shouldRemove: LogicalPlan => Boolean = - if (cascade) { - _.exists(isMatchedPlan) + + val shouldRemove: LogicalPlan => Boolean = if (cascade) { + _.exists(isMatchedPlan(_, false)) } else { - isMatchedPlan + isMatchedPlan(_, false) } val plansToUncache = cachedData.filter(cd => shouldRemove(cd.plan)) this.synchronized { @@ -228,7 +240,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { val cacheAlreadyLoaded = cd.cachedRepresentation. fold(CacheManager.inMemoryRelationExtractor, identity).cacheBuilder. isCachedColumnBuffersLoaded - cd.plan.exists(isMatchedPlan) && !cacheAlreadyLoaded + !cacheAlreadyLoaded && cd.plan.exists(isMatchedPlan(_, true)) }) } } @@ -305,17 +317,19 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { val fullMatch = cachedData.find(cd => plan.sameResult(cd.plan)) fullMatch.map(Option(_)).getOrElse( plan match { - case p: Project => lookUpPartiallyMatchedCachedPlan(p) + case p: Project => lookUpPartiallyMatchedCachedPlan(p, cachedData) case _ => None }) } - private def lookUpPartiallyMatchedCachedPlan(incomingProject: Project): Option[CachedData] = { + private def lookUpPartiallyMatchedCachedPlan( + incomingProject: Project, + cachedPlansToUse: IndexedSeq[CachedData]): Option[CachedData] = { var foundMatch = false var partialMatch: Option[CachedData] = None val (incmngchild, incomingFilterChain) = CompatibilityChecker.extractChildIgnoringFiltersFromIncomingProject(incomingProject) - for (cd <- cachedData if !foundMatch) { + for (cd <- cachedPlansToUse if !foundMatch) { (incmngchild, incomingFilterChain, cd.plan) match { case CompatibilityChecker(residualIncomingFilterChain, cdPlanProject) => // since the child of both incoming and cached plan are same @@ -397,14 +411,21 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { val modifiedInProj = replacementProjectListForIncomingProject(incomingProject, directlyMappedincomingToCachedPlanIndx, cdPlanProject, cdAttribToCommonAttribForIncmngNe, transformedIndirectlyMappableExpr) - val root = cd.cachedRepresentation.toOption.get.withOutput( - projectionToForceOnCdPlan) - val newPartialPlan = if (transformedIntermediateFilters.isEmpty) { - Project(modifiedInProj, root) + // If InMemoryRelation (right is defined) it is the case of lookup or cache query + // Else it is a case of dummy CachedData partial lookup for finding out if the + // plan being checked uses the uncached plan + val newPartialPlan = if (cd.cachedRepresentation.isRight) { + val root = cd.cachedRepresentation.toOption.get.withOutput( + projectionToForceOnCdPlan) + if (transformedIntermediateFilters.isEmpty) { + Project(modifiedInProj, root) + } else { + val chainedFilter = CompatibilityChecker.combineFilterChainUsingRoot( + transformedIntermediateFilters, root) + Project(modifiedInProj, chainedFilter) + } } else { - val chainedFilter = CompatibilityChecker.combineFilterChainUsingRoot( - transformedIntermediateFilters, root) - Project(modifiedInProj, chainedFilter) + cd.cachedRepresentation.left.toOption.get } partialMatch = Option(cd.copy(cachedRepresentation = Left(newPartialPlan))) foundMatch = true diff --git a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectWithCachingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectWithCachingSuite.scala index f24f0003981e..1d488e9c48fd 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectWithCachingSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectWithCachingSuite.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql import org.apache.spark.sql.execution.analysis.EarlyCollapseProject -import org.apache.spark.sql.execution.columnar.InMemoryRelation +import org.apache.spark.sql.execution.columnar.{InMemoryRelation, InMemoryTableScanExec} import org.apache.spark.sql.internal.SQLConf class EarlyCollapseProjectWithCachingSuite extends EarlyCollapseProjectSuite { @@ -61,4 +61,29 @@ class EarlyCollapseProjectWithCachingSuite extends EarlyCollapseProjectSuite { } checkAnswer(fullyUnopt, rows) } + + test("check cached plan invalidation when subplan is uncached") { + val baseDf = spark.range(1000).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b").filter($"a" > 4) + val df1 = baseDf.withColumn("d", $"a" + 1 + $"b") + baseDf.cache() + // Add df1 to the CacheManager; the buffer is currently empty. + df1.cache() + assertCacheDependency(df1, 1) + // removal of InMemoryRelation of base Df should result in the removal of dependency of df1 + baseDf.unpersist(blocking = true) + assertCacheDependency(df1.limit(1000), 0) + } + + + private def assertCacheDependency(df: DataFrame, numOfCachesDependedUpon: Int = 1): Unit = { + + val cachedPlan = df.queryExecution.withCachedData.collectFirst { + case i: InMemoryRelation => i.cacheBuilder.cachedPlan + } + assert(cachedPlan.isDefined) + + assert(find(cachedPlan.get)(_.isInstanceOf[InMemoryTableScanExec]).size + == numOfCachesDependedUpon) + } } From 8bfb25be42f523b3d3ecf90416962a7871578805 Mon Sep 17 00:00:00 2001 From: ashahid Date: Thu, 4 Apr 2024 05:57:18 -0700 Subject: [PATCH 127/129] SPARK-45959. added tests for verifying plans uncaching is happening correctly. Fixed the PR related to the uncaching issues found in testing --- .../spark/sql/EarlyCollapseProjectSuite.scala | 173 ++++++++++++------ ...EarlyCollapseProjectWithCachingSuite.scala | 23 +-- 2 files changed, 128 insertions(+), 68 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala index 4a9e9aef1162..39f505348c14 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala @@ -18,9 +18,10 @@ package org.apache.spark.sql import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan} +import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.execution.analysis.EarlyCollapseProject -import org.apache.spark.sql.execution.columnar.InMemoryRelation +import org.apache.spark.sql.execution.columnar.{InMemoryRelation, InMemoryTableScanExec} import org.apache.spark.sql.functions.{col, lit} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession @@ -30,10 +31,12 @@ class EarlyCollapseProjectSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlanHelper { import testImplicits._ val useCaching: Boolean = false + test("withColumns: check no new project addition for simple columns addition") { val baseDfCreator = () => spark.range(20).select($"id" as "a", $"id" as "b") - checkProjectCollapseAndCacheUse(baseDfCreator, - df => df.withColumns(Seq("newCol1", "newCol2"), Seq(col("a") + 1, col("b") + 2))) + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, + df => df.withColumns(Seq("newCol1", "newCol2"), Seq(col("a") + 1, col("b") + 2)), + (1, 2), (1, 1)) } test("withColumns: check no new project addition if redefined alias is not used in" + @@ -41,44 +44,48 @@ class EarlyCollapseProjectSuite extends QueryTest val baseDfCreator = () => spark.range(20).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "a", $"b") - checkProjectCollapseAndCacheUse(baseDfCreator, - df => df.withColumns(Seq("newCol1"), Seq(col("b") + 2))) + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, + df => df.withColumns(Seq("newCol1"), Seq(col("b") + 2)), (1, 2), (1, 1)) } test("withColumns: no new project addition if redefined alias is used in new columns - 1") { val baseDfCreator = () => spark.range(20).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "a", $"b") - checkProjectCollapseAndCacheUse(baseDfCreator, - df => df.withColumns(Seq("newCol1"), Seq(col("a") + 2))) + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, + df => df.withColumns(Seq("newCol1"), Seq(col("a") + 2)), (1, 2), (1, 1)) } test("withColumns: no new project addition if redefined alias is used in new columns - 2") { val baseDfCreator = () => spark.range(20).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b"). select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e") - checkProjectCollapseAndCacheUse(baseDfCreator, - df => df.withColumns(Seq("newCol1"), Seq(col("c") + 2 + col("a") * col("e")))) + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, + df => df.withColumns(Seq("newCol1"), Seq(col("c") + 2 + col("a") * col("e"))), (1, 2), (1, 1)) } test("withColumnRenamed: remap of column should not result in new project if the source" + " of remap is not used in other cols") { val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b") - checkProjectCollapseAndCacheUse(baseDfCreator, df => df.withColumnRenamed("a", "c")) + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, + df => df.withColumnRenamed("a", "c"), (1, 1), (0, 0)) } test("withColumnRenamed: remap of column should not result in new project if the source" + " of remap is an attribute used in other cols") { val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b") - checkProjectCollapseAndCacheUse(baseDfCreator, df => df.withColumnRenamed("a", "d")) + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, + df => df.withColumnRenamed("a", "d"), (1, 1), (0, 0)) } + test("withColumnRenamed: remap of column should not result in new project if the remap" + " is on an alias") { val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d" ) - checkProjectCollapseAndCacheUse(baseDfCreator, df => df.withColumnRenamed("d", "x")) + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, df => df.withColumnRenamed("d", "x"), + (1, 1), (0, 0)) } test("withColumnRenamed: remap of column should not result in new project if the remap" + @@ -86,130 +93,148 @@ class EarlyCollapseProjectSuite extends QueryTest val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d"). select($"c", $"a", $"b", $"d", $"d" as "k") - checkProjectCollapseAndCacheUse(baseDfCreator, df => df.withColumnRenamed("d", "x")) + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, df => df.withColumnRenamed("d", "x"), + (1, 1), (0, 0)) } + test("withColumnRenamed: test multi column remap") { val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") - checkProjectCollapseAndCacheUse(baseDfCreator, - df => df.withColumnsRenamed(Map("d" -> "x", "c" -> "k", "a" -> "u"))) + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, + df => df.withColumnsRenamed(Map("d" -> "x", "c" -> "k", "a" -> "u")), (1, 1), (0, 0)) } test("withColumns: test multi column addition") { val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") - checkProjectCollapseAndCacheUse(baseDfCreator, + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, df => df.withColumns( Seq("newCol1", "newCol2", "newCol3", "newCol4"), - Seq(col("a") + 2, col("b") + 7, col("a") + col("b"), col("a") + col("d")))) + Seq(col("a") + 2, col("b") + 7, col("a") + col("b"), col("a") + col("d"))), (1, 2), (1, 1)) } test("mix of column addition, rename and dropping") { - val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). + val baseDfCreator = () => spark.range(100).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") - checkProjectCollapseAndCacheUse(baseDfCreator, + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, df => df.select($"a" + $"d" as "newCol1", $"b" * $"a" as "newCol2", - $"a" as "renameCola", $"c" * $"d" as "c", $"a")) + $"a" as "renameCola", $"c" * $"d" as "c", $"a"), (1, 2), (1, 1)) } + test("mix of column addition, rename and dropping - 1") { - val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). + val baseDfCreator = () => spark.range(100).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") - checkProjectCollapseAndCacheUse(baseDfCreator, + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, df => df.select($"c" * $"d" as "c", $"a" + $"d" as "newCol1", $"b" * $"a" as "newCol2", - $"a" as "renameCola", $"a")) + $"a" as "renameCola", $"a"), (1, 2), (1, 1)) } test("mix of column addition, rename and dropping - 2") { val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" + 5 as "b"). select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") - checkProjectCollapseAndCacheUse(baseDfCreator, - df => df.select($"d", $"b" as "renameB", $"a" as "renameA", $"a" as "renameColA")) + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, + df => df.select($"d", $"b" as "renameB", $"a" as "renameA", $"a" as "renameColA"), + (1, 2), (1, 1)) } + test("mix of column addition, rename and dropping - 3") { val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" + 5 as "b"). select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") - checkProjectCollapseAndCacheUse(baseDfCreator, + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, df => df.select($"d" * $"a" as "d", $"b" as "renameB", $"a" * $"d" as "renameA", - $"a" as "renameColA")) + $"a" as "renameColA"), (1, 2), (1, 1)) } + test("mix of column addition, rename and dropping - 4") { val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" + 5 as "b"). select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") - checkProjectCollapseAndCacheUse(baseDfCreator, df => df.select($"c")) + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, df => df.select($"c"), + (1, 2), (0, 1)) } + test("mix of column addition, rename and dropping - 5") { val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" + 5 as "b"). select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") - checkProjectCollapseAndCacheUse(baseDfCreator, df => df.select($"d" * 7 as "a")) + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, df => df.select($"d" * 7 as "a"), + (1, 2), (0, 1)) } + test("mix of column addition, rename and dropping - 6") { val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" + 5 as "b"). select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") - checkProjectCollapseAndCacheUse(baseDfCreator, df => df.select($"d" * 7 as "a", $"d" * 7 as "b", - $"b" + $"a" as "e")) + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, + df => df.select($"d" * 7 as "a", $"d" * 7 as "b", $"b" + $"a" as "e"), (1, 2), (0, 1)) } test("mix of column addition, rename and dropping - 7") { val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" + 5 as "b"). select($"a" + $"b" as "c", $"a", $"b").select( lit(9) as "e", $"c", lit(11) as "a", $"b", $"c" * $"a" * $"b" as "d") - checkProjectCollapseAndCacheUse(baseDfCreator, df => df.select($"a" as "a1", lit(7) as "d1", - $"b" as "b1", $"c" * $"a" as "c", lit(13) as "f")) + checkProjectCollapseCacheUseAndInvalidation( + baseDfCreator, + df => df.select($"a" as "a1", lit(7) as "d1", $"b" as "b1", $"c" * $"a" as "c", + lit(13) as "f"), (1, 2), (0, 1)) } test("new columns added do not result in new project -1") { val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") - checkProjectCollapseAndCacheUse(baseDfCreator, df => df.withColumns( + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, df => df.withColumns( Seq("newCol1", "newCol2", "newCol3", "newCol4"), - Seq(col("a") + 2, col("b") + 7, col("a") + col("b"), col("a") + col("d")))) + Seq(col("a") + 2, col("b") + 7, col("a") + col("b"), col("a") + col("d"))), + (1, 2), (1, 1)) } test("new columns added do not result in new project -2") { val baseDfCreator = () => spark.range(20).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b"). select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e") - checkProjectCollapseAndCacheUse(baseDfCreator, - df => df.withColumns(Seq("newCol1"), Seq(col("c") + 2 + col("a") * col("e")))) + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, + df => df.withColumns(Seq("newCol1"), Seq(col("c") + 2 + col("a") * col("e"))), + (1, 2), (1, 1)) } test("new columns added do not result in new project, with positions changed") { val baseDfCreator = () => spark.range(20).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b"). select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e") - checkProjectCollapseAndCacheUse(baseDfCreator, - df => df.select( $"e", $"a", $"c" + 2 + $"a" * $"e" as "newCol", $"c", $"d", $"b")) + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, + df => df.select( $"e", $"a", $"c" + 2 + $"a" * $"e" as "newCol", $"c", $"d", $"b"), + (1, 2), (1, 1)) } + test("renamed columns do not result in new project") { val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") - checkProjectCollapseAndCacheUse(baseDfCreator, df => df.withColumnsRenamed( - Map("c" -> "c1", "a" -> "a1", "b" -> "b1", "d" -> "d1"))) + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, df => df.withColumnsRenamed( + Map("c" -> "c1", "a" -> "a1", "b" -> "b1", "d" -> "d1")), (1, 1), (0, 0)) } test("early collapse of filter chain with project - 1") { val baseDfCreator = () => spark.range(100).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b") - checkProjectCollapseAndCacheUse(baseDfCreator, df => df.filter($"a" > 4). + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, df => df.filter($"a" > 4). filter($"c" * $"b" < 60). - select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e")) + select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e"), + (1, 2), (0, 1)) } test("early collapse of filter chain with project - 2") { val baseDfCreator = () => spark.range(100).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").filter($"a" > 4).filter($"c" * $"b" < 60) - checkProjectCollapseAndCacheUse(baseDfCreator, df => df.filter($"b" < 100). - select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e")) + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, df => df.filter($"b" < 100). + select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e"), + (1, 2), (0, 1)) } test("resurrection of intermediate dropped cols when used in filter") { @@ -218,8 +243,8 @@ class EarlyCollapseProjectSuite extends QueryTest // A dropped column would result in a new project being added on top of filter // so we have to take into account of that extra project added while checking // assertion of init node size and optimized df nodes size - checkProjectCollapseAndCacheUse(baseDfCreator, df => df.withColumnsRenamed( - Map("c" -> "c1", "b" -> "b1", "d" -> "d1")).filter($"a" > 5)) + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, df => df.withColumnsRenamed( + Map("c" -> "c1", "b" -> "b1", "d" -> "d1")).filter($"a" > 5), (1, 2), (0, 1)) } test("resurrection of right renamed intermediate dropped cols when used in filter") { @@ -228,14 +253,19 @@ class EarlyCollapseProjectSuite extends QueryTest // A dropped column would result in a new project being added on top of filter // so we have to take into account of that extra project added while checking // assertion of init node size and optimized df nodes size - checkProjectCollapseAndCacheUse(baseDfCreator, df => df.withColumnsRenamed( - Map("c" -> "c1", "b" -> "b1", "d" -> "d1")).select($"c1", $"d1").filter($"a" > 25)) + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, df => df.withColumnsRenamed( + Map("c" -> "c1", "b" -> "b1", "d" -> "d1")).select($"c1", $"d1").filter($"a" > 25), + (1, 2), (0, 1)) } - protected def checkProjectCollapseAndCacheUse( + protected def checkProjectCollapseCacheUseAndInvalidation( baseDfCreator: () => DataFrame, - testExec: DataFrame => DataFrame): Unit = { + testExec: DataFrame => DataFrame, + baseAndDerivedIMRsOnCache: (Int, Int), + baseAndDerivedIMRsOnCBaseInvalidation: (Int, Int)): Unit = { val baseDf = baseDfCreator() + val baseDfRows = baseDf.collect() + val testDfRows = testExec(baseDf).collect() if (useCaching) { baseDf.cache() } @@ -260,6 +290,30 @@ class EarlyCollapseProjectSuite extends QueryTest } assert(collectNodes(fullyUnopt).size >= nonOptDfNodes.size) checkAnswer(newDfOpt, fullyUnopt) + + if (useCaching) { + // first unpersist both dataframes + baseDf.unpersist(true) + newDfOpt.unpersist(true) + baseDf.cache() + newDfOpt.cache() + assertCacheDependency(baseDfCreator(), baseAndDerivedIMRsOnCache._1) + assertCacheDependency(testExec(baseDfCreator()), baseAndDerivedIMRsOnCache._2) + checkAnswer(baseDfCreator(), baseDfRows) + checkAnswer(testExec(baseDfCreator()), testDfRows) + baseDf.unpersist(true) + newDfOpt.unpersist(true) + baseDfCreator().cache() + testExec(baseDfCreator()).cache() + baseDfCreator().unpersist(true) + assertCacheDependency(baseDfCreator(), baseAndDerivedIMRsOnCBaseInvalidation._1) + assertCacheDependency(testExec(baseDfCreator()), baseAndDerivedIMRsOnCBaseInvalidation._2) + checkAnswer(baseDfCreator(), baseDfRows) + checkAnswer(testExec(baseDfCreator()), testDfRows) + // recache base df so that if existing tests want to continue should work fine + newDfOpt.unpersist(true) + baseDfCreator().cache() + } } private def getComparableDataFrames( @@ -279,5 +333,22 @@ class EarlyCollapseProjectSuite extends QueryTest private def collectNodes(df: DataFrame): Seq[LogicalPlan] = df.logicalPlan.collect { case l => l } + + def assertCacheDependency(df: DataFrame, numOfCachesExpected: Int): Unit = { + + val cachedPlans = df.queryExecution.withCachedData.collect { + case i: InMemoryRelation => i.cacheBuilder.cachedPlan + } + val totalIMRs = cachedPlans.size + cachedPlans.map(ime => recurse(ime)).sum + assert(totalIMRs == numOfCachesExpected) + } + + private def recurse(sparkPlan: SparkPlan): Int = { + val imrs = sparkPlan.collect { + case i: InMemoryTableScanExec => i + } + imrs.size + imrs.map(ime => recurse(ime.relation.cacheBuilder.cachedPlan)).sum + } + } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectWithCachingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectWithCachingSuite.scala index 1d488e9c48fd..20165eb121f0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectWithCachingSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectWithCachingSuite.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql import org.apache.spark.sql.execution.analysis.EarlyCollapseProject -import org.apache.spark.sql.execution.columnar.{InMemoryRelation, InMemoryTableScanExec} +import org.apache.spark.sql.execution.columnar.InMemoryRelation import org.apache.spark.sql.internal.SQLConf class EarlyCollapseProjectWithCachingSuite extends EarlyCollapseProjectSuite { @@ -29,8 +29,9 @@ class EarlyCollapseProjectWithCachingSuite extends EarlyCollapseProjectSuite { val baseDfCreator = () => spark.range(1000).select($"id" as "a", $"id" as "b"). select($"a" + 1 as "c", $"a", $"b").filter($"a" > 4).filter($"c" * $"b" < 60) - checkProjectCollapseAndCacheUse(baseDfCreator, df => df.filter($"b" < 100). - select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e")) + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, df => df.filter($"b" < 100). + select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e"), + (1, 2), (0, 1)) // there is already a cached base Df val df1 = baseDfCreator().filter($"b" < 100). @@ -69,21 +70,9 @@ class EarlyCollapseProjectWithCachingSuite extends EarlyCollapseProjectSuite { baseDf.cache() // Add df1 to the CacheManager; the buffer is currently empty. df1.cache() - assertCacheDependency(df1, 1) + assertCacheDependency(df1, 2) // removal of InMemoryRelation of base Df should result in the removal of dependency of df1 baseDf.unpersist(blocking = true) - assertCacheDependency(df1.limit(1000), 0) - } - - - private def assertCacheDependency(df: DataFrame, numOfCachesDependedUpon: Int = 1): Unit = { - - val cachedPlan = df.queryExecution.withCachedData.collectFirst { - case i: InMemoryRelation => i.cacheBuilder.cachedPlan - } - assert(cachedPlan.isDefined) - - assert(find(cachedPlan.get)(_.isInstanceOf[InMemoryTableScanExec]).size - == numOfCachesDependedUpon) + assertCacheDependency(df1.limit(1000), 1) } } From 59ff5db7a9aaf07006b9ba915e81925d576153d1 Mon Sep 17 00:00:00 2001 From: ashahid Date: Thu, 4 Apr 2024 10:45:24 -0700 Subject: [PATCH 128/129] SPARK-45959. added more assertions and enhanced tests to do more of cache invalidation among independent and dependent dataframes --- .../spark/sql/EarlyCollapseProjectSuite.scala | 42 ++++++++++++------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala index 39f505348c14..036a102ba77d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala @@ -26,7 +26,6 @@ import org.apache.spark.sql.functions.{col, lit} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession - class EarlyCollapseProjectSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlanHelper { import testImplicits._ @@ -262,12 +261,31 @@ class EarlyCollapseProjectSuite extends QueryTest baseDfCreator: () => DataFrame, testExec: DataFrame => DataFrame, baseAndDerivedIMRsOnCache: (Int, Int), - baseAndDerivedIMRsOnCBaseInvalidation: (Int, Int)): Unit = { + baseAndDerivedIMRsOnBaseInvalidation: (Int, Int)): Unit = { + // now check if the results of optimized dataframe and completely unoptimized dataframe are + // same + val fullyUnoptBase = withSQLConf( + SQLConf.EXCLUDE_POST_ANALYSIS_RULES.key -> EarlyCollapseProject.ruleName) { + baseDfCreator() + } + + val fullyUnoptTest = withSQLConf( + SQLConf.EXCLUDE_POST_ANALYSIS_RULES.key -> EarlyCollapseProject.ruleName) { + testExec(baseDfCreator()) + } + + val baseDfRows = fullyUnoptBase.collect() + val testDfRows = fullyUnoptTest.collect() + val baseDf = baseDfCreator() - val baseDfRows = baseDf.collect() - val testDfRows = testExec(baseDf).collect() if (useCaching) { baseDf.cache() + assertCacheDependency(baseDfCreator(), 1) + assertCacheDependency(testExec(baseDfCreator()), 1) + baseDfCreator().unpersist(true) + assertCacheDependency(baseDfCreator(), 0) + assertCacheDependency(testExec(baseDfCreator()), 0) + baseDfCreator().cache() } val initNodes = collectNodes(baseDf) val (newDfOpt, newDfUnopt) = getComparableDataFrames(baseDf, testExec) @@ -283,13 +301,9 @@ class EarlyCollapseProjectSuite extends QueryTest assert(newDfOpt.queryExecution.optimizedPlan.collectLeaves().head. isInstanceOf[InMemoryRelation]) } - // now check if the results of optimized dataframe and completely unoptimized dataframe are same - val fullyUnopt = withSQLConf( - SQLConf.EXCLUDE_POST_ANALYSIS_RULES.key -> EarlyCollapseProject.ruleName) { - testExec(baseDfCreator()) - } - assert(collectNodes(fullyUnopt).size >= nonOptDfNodes.size) - checkAnswer(newDfOpt, fullyUnopt) + + assert(collectNodes(fullyUnoptTest).size >= nonOptDfNodes.size) + checkAnswer(newDfOpt, fullyUnoptTest) if (useCaching) { // first unpersist both dataframes @@ -306,8 +320,8 @@ class EarlyCollapseProjectSuite extends QueryTest baseDfCreator().cache() testExec(baseDfCreator()).cache() baseDfCreator().unpersist(true) - assertCacheDependency(baseDfCreator(), baseAndDerivedIMRsOnCBaseInvalidation._1) - assertCacheDependency(testExec(baseDfCreator()), baseAndDerivedIMRsOnCBaseInvalidation._2) + assertCacheDependency(baseDfCreator(), baseAndDerivedIMRsOnBaseInvalidation._1) + assertCacheDependency(testExec(baseDfCreator()), baseAndDerivedIMRsOnBaseInvalidation._2) checkAnswer(baseDfCreator(), baseDfRows) checkAnswer(testExec(baseDfCreator()), testDfRows) // recache base df so that if existing tests want to continue should work fine @@ -335,7 +349,6 @@ class EarlyCollapseProjectSuite extends QueryTest } def assertCacheDependency(df: DataFrame, numOfCachesExpected: Int): Unit = { - val cachedPlans = df.queryExecution.withCachedData.collect { case i: InMemoryRelation => i.cacheBuilder.cachedPlan } @@ -349,6 +362,5 @@ class EarlyCollapseProjectSuite extends QueryTest } imrs.size + imrs.map(ime => recurse(ime.relation.cacheBuilder.cachedPlan)).sum } - } From 30accd1401449c7a0576311420afb899b1bbe4ad Mon Sep 17 00:00:00 2001 From: ashahid Date: Mon, 3 Jun 2024 18:32:24 -0700 Subject: [PATCH 129/129] SPARK-45959. fix compilation issue in merge --- .../main/scala/org/apache/spark/sql/execution/CacheManager.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index b0c35266ac5a..e74c15b075c2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{Filter, IgnoreCachedData, Le import org.apache.spark.sql.catalyst.trees.TreePattern.PLAN_EXPRESSION import org.apache.spark.sql.catalyst.util.sideBySide import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper +import org.apache.spark.sql.execution.analysis.EarlyCollapseProject import org.apache.spark.sql.execution.columnar.InMemoryRelation import org.apache.spark.sql.execution.command.CommandUtils import org.apache.spark.sql.execution.datasources.{FileIndex, HadoopFsRelation, LogicalRelation}