diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/crosstab.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/crosstab.explain index a30cd136e8db..0487d7360201 100644 --- a/connector/connect/common/src/test/resources/query-tests/explain-results/crosstab.explain +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/crosstab.explain @@ -1,5 +1,4 @@ Project [a_b#0] -+- Project [a_b#0] - +- Aggregate [a_b#0], [a_b#0, pivotfirst(__pivot_col#0, count(1) AS count#0L, 0, 0) AS __pivot_count(1) AS count AS `count(1) AS count`#0] - +- Aggregate [CASE WHEN isnull(a#0) THEN null ELSE cast(a#0 as string) END, CASE WHEN isnull(b#0) THEN null ELSE regexp_replace(cast(b#0 as string), `, , 1) END], [CASE WHEN isnull(a#0) THEN null ELSE cast(a#0 as string) END AS a_b#0, CASE WHEN isnull(b#0) THEN null ELSE regexp_replace(cast(b#0 as string), `, , 1) END AS __pivot_col#0, count(1) AS count(1) AS count#0L] - +- LocalRelation , [id#0L, a#0, b#0] ++- Aggregate [a_b#0], [a_b#0, pivotfirst(__pivot_col#0, count(1) AS count#0L, 0, 0) AS __pivot_count(1) AS count AS `count(1) AS count`#0] + +- Aggregate [CASE WHEN isnull(a#0) THEN null ELSE cast(a#0 as string) END, CASE WHEN isnull(b#0) THEN null ELSE regexp_replace(cast(b#0 as string), `, , 1) END], [CASE WHEN isnull(a#0) THEN null ELSE cast(a#0 as string) END AS a_b#0, CASE WHEN isnull(b#0) THEN null ELSE regexp_replace(cast(b#0 as string), `, , 1) END AS __pivot_col#0, count(1) AS count(1) AS count#0L] + +- LocalRelation , [id#0L, a#0, b#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/describe.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/describe.explain index b203f715c71a..2e20694bd784 100644 --- a/connector/connect/common/src/test/resources/query-tests/explain-results/describe.explain +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/describe.explain @@ -1,6 +1,5 @@ Project [summary#0, element_at(id#0, summary#0, None, false) AS id#0, element_at(b#0, summary#0, None, false) AS b#0] -+- Project [id#0, b#0, summary#0] - +- Generate explode([count,mean,stddev,min,max]), false, [summary#0] - +- Aggregate [map(cast(count as string), cast(count(id#0L) as string), cast(mean as string), cast(avg(id#0L) as string), cast(stddev as string), cast(stddev(cast(id#0L as double)) as string), cast(min as string), cast(min(id#0L) as string), cast(max as string), cast(max(id#0L) as string)) AS id#0, map(cast(count as string), cast(count(b#0) as string), cast(mean as string), cast(avg(b#0) as string), cast(stddev as string), cast(stddev(b#0) as string), cast(min as string), cast(min(b#0) as string), cast(max as string), cast(max(b#0) as string)) AS b#0] - +- Project [id#0L, b#0] - +- LocalRelation , [id#0L, a#0, b#0] ++- Generate explode([count,mean,stddev,min,max]), false, [summary#0] + +- Aggregate [map(cast(count as string), cast(count(id#0L) as string), cast(mean as string), cast(avg(id#0L) as string), cast(stddev as string), cast(stddev(cast(id#0L as double)) as string), cast(min as string), cast(min(id#0L) as string), cast(max as string), cast(max(id#0L) as string)) AS id#0, map(cast(count as string), cast(count(b#0) as string), cast(mean as string), cast(avg(b#0) as string), cast(stddev as string), cast(stddev(b#0) as string), cast(min as string), cast(min(b#0) as string), cast(max as string), cast(max(b#0) as string)) AS b#0] + +- Project [id#0L, b#0] + +- LocalRelation , [id#0L, a#0, b#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/summary.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/summary.explain index 3ce8a26f1383..f75ef6c0885f 100644 --- a/connector/connect/common/src/test/resources/query-tests/explain-results/summary.explain +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/summary.explain @@ -1,5 +1,4 @@ Project [summary#0, element_at(id#0, summary#0, None, false) AS id#0, element_at(a#0, summary#0, None, false) AS a#0, element_at(b#0, summary#0, None, false) AS b#0] -+- Project [id#0, a#0, b#0, summary#0] - +- Generate explode([mean,min]), false, [summary#0] - +- Aggregate [map(cast(mean as string), cast(avg(id#0L) as string), cast(min as string), cast(min(id#0L) as string)) AS id#0, map(cast(mean as string), cast(avg(a#0) as string), cast(min as string), cast(min(a#0) as string)) AS a#0, map(cast(mean as string), cast(avg(b#0) as string), cast(min as string), cast(min(b#0) as string)) AS b#0] - +- LocalRelation , [id#0L, a#0, b#0] ++- Generate explode([mean,min]), false, [summary#0] + +- Aggregate [map(cast(mean as string), cast(avg(id#0L) as string), cast(min as string), cast(min(id#0L) as string)) AS id#0, map(cast(mean as string), cast(avg(a#0) as string), cast(min as string), cast(min(a#0) as string)) AS a#0, map(cast(mean as string), cast(avg(b#0) as string), cast(min as string), cast(min(b#0) as string)) AS b#0] + +- LocalRelation , [id#0L, a#0, b#0] diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index a233161713c3..16731aee2201 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -59,6 +59,7 @@ import org.apache.spark.sql.types._ import org.apache.spark.sql.types.DayTimeIntervalType.DAY import org.apache.spark.sql.util.CaseInsensitiveStringMap import org.apache.spark.util.ArrayImplicits._ +import org.apache.spark.util.Utils /** * A trivial [[Analyzer]] with a dummy [[SessionCatalog]] and @@ -212,7 +213,12 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor AnalysisHelper.markInAnalyzer { val analyzed = executeAndTrack(plan, tracker) checkAnalysis(analyzed) - analyzed + val excludedPostAnalysisRulesConf = + conf.postAnalysisExcludesRules.toSeq.flatMap(Utils.stringToSeq) + postAnalysisEarlyOptimizationRules.filterNot( + rule => excludedPostAnalysisRulesConf.contains(rule.ruleName)).foldLeft(analyzed) { + case(rs, rule) => rule(rs) + } } } @@ -248,6 +254,8 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor */ val postHocResolutionRules: Seq[Rule[LogicalPlan]] = Nil + val postAnalysisEarlyOptimizationRules: Seq[Rule[LogicalPlan]] = Nil + private def typeCoercionRules(): List[Rule[LogicalPlan]] = if (conf.ansiEnabled) { AnsiTypeCoercion.typeCoercionRules } else { @@ -356,6 +364,8 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor Batch("HandleSpecialCommand", Once, HandleSpecialCommand), Batch("Remove watermark for batch query", Once, + EliminateEventTimeWatermark), + Batch("Remove watermark for batch query", Once, EliminateEventTimeWatermark) ) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala index c10e000a098c..196f93db91d6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala @@ -59,24 +59,39 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase { val (newExprs, newChild) = { // Resolving expressions against current plan. val maybeResolvedExprs = exprs.map(resolveExpressionByPlanOutput(_, u)) + // Recursively resolving expressions on the child of current plan. resolveExprsAndAddMissingAttrs(maybeResolvedExprs, u.child) } // If some attributes used by expressions are resolvable only on the rewritten child // plan, we need to add them into original projection. - lazy val missingAttrs = - (AttributeSet(newExprs) -- u.outputSet).intersect(newChild.outputSet) + val (missingAttrsFromOutput, missingAttrsFromDroppedAttr) = { + val missing1 = AttributeSet(newExprs) -- u.outputSet + val fulfilledFromOutput = missing1.intersect(newChild.outputSet) + val missing2 = missing1 -- fulfilledFromOutput + val fulfilledFromDroppedCol = missing2.intersect(u.getTagValue( + LogicalPlan.DROPPED_NAMED_EXPRESSIONS). + map(sq => AttributeSet(sq.map(_.toAttribute))).getOrElse(AttributeSet.empty)) + fulfilledFromOutput -> fulfilledFromDroppedCol + } u match { case p: Project => - val newProject = Project(p.projectList ++ missingAttrs, newChild) + val droppedNamedExprs = p.getTagValue(LogicalPlan.DROPPED_NAMED_EXPRESSIONS). + getOrElse(Seq.empty) + val newProject = Project(p.projectList ++ missingAttrsFromOutput ++ + missingAttrsFromDroppedAttr.map(attr => + droppedNamedExprs.find(_.toAttribute.canonicalized == attr.canonicalized).get), + newChild) newProject.copyTagsFrom(p) (newExprs, newProject) - case a @ Aggregate(groupExprs, aggExprs, child) => - if (missingAttrs.forall(attr => groupExprs.exists(_.semanticEquals(attr)))) { + case a @ Aggregate(groupExprs, aggExprs, _) => + if (missingAttrsFromOutput.forall(attr => + groupExprs.exists(_.semanticEquals(attr)))) { // All the missing attributes are grouping expressions, valid case. (newExprs, - a.copy(aggregateExpressions = aggExprs ++ missingAttrs, child = newChild)) + a.copy(aggregateExpressions = aggExprs ++ missingAttrsFromOutput, + child = newChild)) } else { // Need to add non-grouping attributes, invalid case. (exprs, a) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index a2ede8ac735c..83c5c00003df 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -136,6 +136,9 @@ abstract class LogicalPlan private[this] lazy val outputAttributes = AttributeSeq.fromNormalOutput(output) + private[this] lazy val droppedAttributes = this.getTagValue( + LogicalPlan.DROPPED_NAMED_EXPRESSIONS).map(_.map(_.toAttribute)).getOrElse(Seq.empty) + private[this] lazy val outputMetadataAttributes = AttributeSeq(metadataOutput) /** @@ -158,7 +161,8 @@ abstract class LogicalPlan nameParts: Seq[String], resolver: Resolver): Option[NamedExpression] = outputAttributes.resolve(nameParts, resolver) - .orElse(outputMetadataAttributes.resolve(nameParts, resolver)) + .orElse(outputMetadataAttributes.resolve(nameParts, resolver)).orElse( + droppedAttributes.resolve(nameParts, resolver)) /** * Given an attribute name, split it to name parts by dot, but @@ -202,6 +206,8 @@ object LogicalPlan { // to the old code path. private[spark] val PLAN_ID_TAG = TreeNodeTag[Long]("plan_id") private[spark] val IS_METADATA_COL = TreeNodeTag[Unit]("is_metadata_col") + private[spark] val DROPPED_NAMED_EXPRESSIONS = + TreeNodeTag[Seq[NamedExpression]]("dropped_namedexprs") } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 88c2228e640c..1a4cfc2db694 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -238,6 +238,14 @@ object SQLConf { } } + val EXCLUDE_POST_ANALYSIS_RULES = + buildConf("spark.sql.analyzer.excludePostAnalysisRules") + .internal() + .doc("The names of the comma separated post analysis rules to be excluded") + .version("3.5.0") + .stringConf + .createOptional + val ANALYZER_MAX_ITERATIONS = buildConf("spark.sql.analyzer.maxIterations") .internal() .doc("The max number of iterations the analyzer runs.") @@ -5159,6 +5167,8 @@ class SQLConf extends Serializable with Logging with SqlApiConf { def analyzerMaxIterations: Int = getConf(ANALYZER_MAX_ITERATIONS) + def postAnalysisExcludesRules: Option[String] = getConf(EXCLUDE_POST_ANALYSIS_RULES) + def optimizerExcludedRules: Option[String] = getConf(OPTIMIZER_EXCLUDED_RULES) def optimizerMaxIterations: Int = getConf(OPTIMIZER_MAX_ITERATIONS) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index c7511737b2b3..a9df4d9c4dee 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -3890,7 +3890,8 @@ class Dataset[T] private[sql]( */ def storageLevel: StorageLevel = { sparkSession.sharedState.cacheManager.lookupCachedData(this).map { cachedData => - cachedData.cachedRepresentation.cacheBuilder.storageLevel + cachedData.cachedRepresentation.fold(CacheManager.inMemoryRelationExtractor, identity). + cacheBuilder.storageLevel }.getOrElse(StorageLevel.NONE) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index b96f257e6b5b..e74c15b075c2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -17,18 +17,21 @@ package org.apache.spark.sql.execution +import scala.collection.mutable + import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.internal.{LogEntry, Logging, MDC} import org.apache.spark.internal.LogKeys._ import org.apache.spark.sql.{Dataset, SparkSession} import org.apache.spark.sql.catalyst.catalog.HiveTableRelation -import org.apache.spark.sql.catalyst.expressions.{Attribute, SubqueryExpression} +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, AttributeReference, AttributeSet, Expression, NamedExpression, SubqueryExpression} import org.apache.spark.sql.catalyst.optimizer.EliminateResolvedHint -import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan, ResolvedHint, View} +import org.apache.spark.sql.catalyst.plans.logical.{Filter, IgnoreCachedData, LeafNode, LogicalPlan, Project, ResolvedHint, View} import org.apache.spark.sql.catalyst.trees.TreePattern.PLAN_EXPRESSION import org.apache.spark.sql.catalyst.util.sideBySide import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper +import org.apache.spark.sql.execution.analysis.EarlyCollapseProject import org.apache.spark.sql.execution.columnar.InMemoryRelation import org.apache.spark.sql.execution.command.CommandUtils import org.apache.spark.sql.execution.datasources.{FileIndex, HadoopFsRelation, LogicalRelation} @@ -37,19 +40,22 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.storage.StorageLevel import org.apache.spark.storage.StorageLevel.MEMORY_AND_DISK + /** Holds a cached logical plan and its data */ + case class CachedData( - // A normalized resolved plan (See QueryExecution#normalized). plan: LogicalPlan, - cachedRepresentation: InMemoryRelation) { + cachedRepresentation: Either[LogicalPlan, InMemoryRelation]) { + override def toString: String = s""" |CachedData( |logicalPlan=$plan - |InMemoryRelation=$cachedRepresentation) + |InMemoryRelation=${cachedRepresentation.merge}) |""".stripMargin } + /** * Provides support in a SQLContext for caching query results and automatically using these cached * results when subsequent queries are executed. Data is cached using byte buffers stored in an @@ -72,7 +78,8 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { /** Clears all cached tables. */ def clearCache(): Unit = this.synchronized { - cachedData.foreach(_.cachedRepresentation.cacheBuilder.clearCache()) + cachedData.foreach(_.cachedRepresentation.fold(CacheManager.inMemoryRelationExtractor, identity) + .cacheBuilder.clearCache()) cachedData = IndexedSeq[CachedData]() CacheManager.logCacheOperation(log"Cleared all Dataframe cache entries") } @@ -118,7 +125,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { storageLevel: StorageLevel): Unit = { if (storageLevel == StorageLevel.NONE) { // Do nothing for StorageLevel.NONE since it will not actually cache any data. - } else if (lookupCachedDataInternal(planToCache).nonEmpty) { + } else if (lookupCachedDataInternal(planToCache).exists(_.cachedRepresentation.isRight)) { logWarning("Asked to cache already cached data.") } else { val sessionWithConfigsOff = getOrCloneSessionWithConfigsOff(spark) @@ -131,10 +138,10 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { } this.synchronized { - if (lookupCachedDataInternal(planToCache).nonEmpty) { + if (lookupCachedDataInternal(planToCache).exists(_.cachedRepresentation.isRight)) { logWarning("Data has already been cached.") } else { - val cd = CachedData(planToCache, inMemoryRelation) + val cd = CachedData(planToCache, Right(inMemoryRelation)) cachedData = cd +: cachedData CacheManager.logCacheOperation(log"Added Dataframe cache entry:" + log"${MDC(DATAFRAME_CACHE_ENTRY, cd)}") @@ -191,21 +198,46 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { uncacheQuery(spark, plan, cascade, blocking = false) } - // The `plan` should have been normalized. - private def uncacheQueryInternal( + /** + * Un-cache the given plan or all the cache entries that refer to the given plan. + * + * @param spark The Spark session. + * @param plan The plan to be un-cached. + * @param cascade If true, un-cache all the cache entries that refer to the given + * plan; otherwise un-cache the given plan only. + * @param blocking Whether to block until all blocks are deleted. + */ + def uncacheQueryInternal( spark: SparkSession, plan: LogicalPlan, cascade: Boolean, - blocking: Boolean): Unit = { - uncacheByCondition(spark, _.sameResult(plan), cascade, blocking) + blocking: Boolean = false): Unit = { + val dummyCd = CachedData(plan, Left(plan)) + uncacheByCondition(spark, + (planToCheck: LogicalPlan, partialMatchOk: Boolean) => { + dummyCd.plan.sameResult(planToCheck) || (partialMatchOk && + (planToCheck match { + case p: Project => lookUpPartiallyMatchedCachedPlan(p, IndexedSeq(dummyCd)).isDefined + case _ => false + })) + }, cascade, blocking) } + def uncacheTableOrView(spark: SparkSession, name: Seq[String], cascade: Boolean): Unit = { uncacheByCondition( - spark, isMatchedTableOrView(_, name, spark.sessionState.conf), cascade, blocking = false) + spark, + isMatchedTableOrView(_, _, name, spark.sessionState.conf), + cascade, + blocking = false) } - private def isMatchedTableOrView(plan: LogicalPlan, name: Seq[String], conf: SQLConf): Boolean = { + + private def isMatchedTableOrView( + plan: LogicalPlan, + partialMatch: Boolean, + name: Seq[String], + conf: SQLConf): Boolean = { def isSameName(nameInCache: Seq[String]): Boolean = { nameInCache.length == name.length && nameInCache.zip(name).forall(conf.resolver.tupled) } @@ -230,20 +262,22 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { private def uncacheByCondition( spark: SparkSession, - isMatchedPlan: LogicalPlan => Boolean, + isMatchedPlan: (LogicalPlan, Boolean) => Boolean, cascade: Boolean, blocking: Boolean): Unit = { - val shouldRemove: LogicalPlan => Boolean = - if (cascade) { - _.exists(isMatchedPlan) + + val shouldRemove: LogicalPlan => Boolean = if (cascade) { + _.exists(isMatchedPlan(_, false)) } else { - isMatchedPlan + isMatchedPlan(_, false) } val plansToUncache = cachedData.filter(cd => shouldRemove(cd.plan)) this.synchronized { cachedData = cachedData.filterNot(cd => plansToUncache.exists(_ eq cd)) } - plansToUncache.foreach { _.cachedRepresentation.cacheBuilder.clearCache(blocking) } + plansToUncache.foreach { _.cachedRepresentation. + fold(CacheManager.inMemoryRelationExtractor, identity).cacheBuilder.clearCache(blocking) } + CacheManager.logCacheOperation(log"Removed ${MDC(SIZE, plansToUncache.size)} Dataframe " + log"cache entries, with logical plans being " + log"\n[${MDC(QUERY_PLAN, plansToUncache.map(_.plan).mkString(",\n"))}]") @@ -263,8 +297,10 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { // 2) The buffer has been cleared, but `isCachedColumnBuffersLoaded` returns true, then we // will keep it as it is. It means the physical plan has been re-compiled already in the // other thread. - val cacheAlreadyLoaded = cd.cachedRepresentation.cacheBuilder.isCachedColumnBuffersLoaded - cd.plan.exists(isMatchedPlan) && !cacheAlreadyLoaded + val cacheAlreadyLoaded = cd.cachedRepresentation. + fold(CacheManager.inMemoryRelationExtractor, identity).cacheBuilder. + isCachedColumnBuffersLoaded + !cacheAlreadyLoaded && cd.plan.exists(isMatchedPlan(_, true)) }) } } @@ -276,8 +312,9 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { column: Seq[Attribute]): Unit = { val relation = cachedData.cachedRepresentation val (rowCount, newColStats) = - CommandUtils.computeColumnStats(sparkSession, relation, column) - relation.updateStats(rowCount, newColStats) + CommandUtils.computeColumnStats(sparkSession, relation.merge, column) + relation.fold(CacheManager.inMemoryRelationExtractor, identity). + updateStats(rowCount, newColStats) } /** @@ -301,15 +338,17 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { cachedData = cachedData.filterNot(cd => needToRecache.exists(_ eq cd)) } needToRecache.foreach { cd => - cd.cachedRepresentation.cacheBuilder.clearCache() + cd.cachedRepresentation.fold(CacheManager.inMemoryRelationExtractor, identity). + cacheBuilder.clearCache() val sessionWithConfigsOff = getOrCloneSessionWithConfigsOff(spark) val newCache = sessionWithConfigsOff.withActive { val qe = sessionWithConfigsOff.sessionState.executePlan(cd.plan) - InMemoryRelation(cd.cachedRepresentation.cacheBuilder, qe) + InMemoryRelation(cd.cachedRepresentation. + fold(CacheManager.inMemoryRelationExtractor, identity).cacheBuilder, qe) } - val recomputedPlan = cd.copy(cachedRepresentation = newCache) + val recomputedPlan = cd.copy(cachedRepresentation = Right(newCache)) this.synchronized { - if (lookupCachedDataInternal(recomputedPlan.plan).nonEmpty) { + if (lookupCachedDataInternal(recomputedPlan.plan).exists(_.cachedRepresentation.isRight)) { logWarning("While recaching, data was already added to cache.") } else { cachedData = recomputedPlan +: cachedData @@ -327,6 +366,34 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { lookupCachedDataInternal(query.queryExecution.normalized) } + /* + Partial match cases: + InComingPlan (case of add cols) cached plan InComing Plan ( case of rename) + Project P2 Project P1 Project P2 + attr1 attr1 attr1 + attr2 attr2 Alias2'(x, attr2) + Alias3 Alias3 Alias3'(y, Alias3-childExpr) + Alias4 Alias4 Alias4'(z, Alias4-childExpr) + Alias5 (k, f(attr1, attr2, al3, al4) + Alias6 (p, f(attr1, attr2, al3, al4) + */ + + /** Optionally returns cached data for the given [[LogicalPlan]]. */ + def lookupCachedDataInternal(plan: LogicalPlan): Option[CachedData] = { + val fullMatch = cachedData.find(cd => plan.sameResult(cd.plan)) + val result = fullMatch.map(Option(_)).getOrElse( + plan match { + case p: Project => lookUpPartiallyMatchedCachedPlan(p, cachedData) + case _ => None + }) + if (result.isDefined) { + CacheManager.logCacheOperation(log"Dataframe cache hit for input plan:" + + log"\n${MDC(QUERY_PLAN, plan)} matched with cache entry:" + + log"${MDC(DATAFRAME_CACHE_ENTRY, result.get)}") + } + result + } + /** * Optionally returns cached data for the given [[LogicalPlan]]. The given plan will be normalized * before being used further. @@ -336,29 +403,248 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { lookupCachedDataInternal(normalized) } - private def lookupCachedDataInternal(plan: LogicalPlan): Option[CachedData] = { - val result = cachedData.find(cd => plan.sameResult(cd.plan)) - if (result.isDefined) { - CacheManager.logCacheOperation(log"Dataframe cache hit for input plan:" + - log"\n${MDC(QUERY_PLAN, plan)} matched with cache entry:" + - log"${MDC(DATAFRAME_CACHE_ENTRY, result.get)}") + private def lookUpPartiallyMatchedCachedPlan( + incomingProject: Project, + cachedPlansToUse: IndexedSeq[CachedData]): Option[CachedData] = { + var foundMatch = false + var partialMatch: Option[CachedData] = None + val (incmngchild, incomingFilterChain) = + CompatibilityChecker.extractChildIgnoringFiltersFromIncomingProject(incomingProject) + for (cd <- cachedPlansToUse if !foundMatch) { + (incmngchild, incomingFilterChain, cd.plan) match { + case CompatibilityChecker(residualIncomingFilterChain, cdPlanProject) => + // since the child of both incoming and cached plan are same + // that is why we are here. for mapping and comparison purposes lets + // canonicalize the cachedPlan's project list in terms of the incoming plan's child + // so that we can map correctly. + val cdPlanToIncomngPlanChildOutputMapping = + cdPlanProject.child.output.zip(incmngchild.output).toMap + + val canonicalizedCdProjList = cdPlanProject.projectList.map(_.transformUp { + case attr: Attribute => cdPlanToIncomngPlanChildOutputMapping(attr) + }.asInstanceOf[NamedExpression]) + + // matchIndexInCdPlanProj remains -1 in the end, it indicates it is + // new cols created out of existing output attribs + val (directlyMappedincomingToCachedPlanIndx, inComingProjNoDirectMapping) = + getDirectAndIndirectMappingOfIncomingToCachedProjectAttribs( + incomingProject, canonicalizedCdProjList) + + // Now there is a possible case where a literal is present in IMR as attribute + // and the incoming project also has that literal somewhere in the alias. Though + // we do not need to read it but looks like the deserializer fails if we skip that + // literal in the projection enforced on IMR. so in effect even if we do not + // require an attribute it still needs to be present in the projection forced + // also its possible that some attribute from IMR can be used in subexpression + // of the incoming projection. so we have to handle that + val unusedAttribsOfCDPlanToGenIncomingAttr = + cdPlanProject.projectList.indices.filterNot(i => + directlyMappedincomingToCachedPlanIndx.exists(_._2 == i)).map(i => { + val cdAttrib = cdPlanProject.projectList(i) + i -> AttributeReference(cdAttrib.name, cdAttrib.dataType, + cdAttrib.nullable, cdAttrib.metadata)(qualifier = cdAttrib.qualifier) + }) + + // Because in case of rename multiple incmong named exprs ( attribute or aliases) + // will point to a common cdplan attrib, we need to ensure they do not create + // separate attribute in the the modifiedProject for incoming plan.. + // that is a single attribute ref is present in all mixes of rename and pass thru + // attributes. + // so we will use the first attribute ref in the incoming directly mapped project + // or if no attrib exists ( only case of rename) we will pick the child expr which + // is bound to be an attribute as the common ref. + val cdAttribToCommonAttribForIncmngNe = directlyMappedincomingToCachedPlanIndx.map { + case (inAttribIndex, cdAttribIndex) => + cdPlanProject.projectList(cdAttribIndex).toAttribute -> + incomingProject.projectList(inAttribIndex) + }.groupBy(_._1).map { + case (cdAttr, incomngSeq) => + val incmngCommonAttrib = incomngSeq.map(_._2).flatMap { + case attr: Attribute => Seq(attr) + case Alias(attr: Attribute, _) => Seq(attr) + case _ => Seq.empty + }.headOption.getOrElse( + AttributeReference(cdAttr.name, cdAttr.dataType, cdAttr.nullable)()) + cdAttr -> incmngCommonAttrib + } + + // If expressions of inComingProjNoDirectMapping can be expressed in terms of the + // incoming attribute refs or incoming alias exprs, which can be mapped directly + // to the CachedPlan's output, we are good. so lets transform such indirectly + // mappable named expressions in terms of mappable attributes of the incoming plan + val transformedIndirectlyMappableExpr = + transformIndirectlyMappedExpressionsToUseCachedPlanAttributes( + inComingProjNoDirectMapping, incomingProject, cdPlanProject, + directlyMappedincomingToCachedPlanIndx, cdAttribToCommonAttribForIncmngNe, + unusedAttribsOfCDPlanToGenIncomingAttr, canonicalizedCdProjList) + + val projectionToForceOnCdPlan = cdPlanProject.output.zipWithIndex.map { + case (cdAttr, i) => + cdAttribToCommonAttribForIncmngNe.getOrElse(cdAttr, + unusedAttribsOfCDPlanToGenIncomingAttr.find(_._1 == i).map(_._2).get) + } + val forcedAttribset = AttributeSet(projectionToForceOnCdPlan) + if (transformedIndirectlyMappableExpr.forall( + _._2.references.subsetOf(forcedAttribset))) { + val transformedIntermediateFilters = transformFilters(residualIncomingFilterChain, + projectionToForceOnCdPlan, canonicalizedCdProjList) + if (transformedIntermediateFilters.forall(_.references.subsetOf(forcedAttribset))) { + val modifiedInProj = replacementProjectListForIncomingProject(incomingProject, + directlyMappedincomingToCachedPlanIndx, cdPlanProject, + cdAttribToCommonAttribForIncmngNe, transformedIndirectlyMappableExpr) + // If InMemoryRelation (right is defined) it is the case of lookup or cache query + // Else it is a case of dummy CachedData partial lookup for finding out if the + // plan being checked uses the uncached plan + val newPartialPlan = if (cd.cachedRepresentation.isRight) { + val root = cd.cachedRepresentation.toOption.get.withOutput( + projectionToForceOnCdPlan) + if (transformedIntermediateFilters.isEmpty) { + Project(modifiedInProj, root) + } else { + val chainedFilter = CompatibilityChecker.combineFilterChainUsingRoot( + transformedIntermediateFilters, root) + Project(modifiedInProj, chainedFilter) + } + } else { + cd.cachedRepresentation.left.toOption.get + } + partialMatch = Option(cd.copy(cachedRepresentation = Left(newPartialPlan))) + foundMatch = true + } + } + case _ => + } } - result + partialMatch } - /** - * Replaces segments of the given logical plan with cached versions where possible. The input - * plan must be normalized. - */ - private[sql] def useCachedData(plan: LogicalPlan): LogicalPlan = { + private def transformFilters(skippedFilters: Seq[Filter], + projectionToForceOnCdPlan: Seq[Attribute], + canonicalizedCdProjList: Seq[NamedExpression]): Seq[Filter] = { + val canonicalizedCdProjAsExpr = canonicalizedCdProjList.map { + case Alias(child, _) => child + case x => x + } + skippedFilters.map(f => { + val transformedCondn = f.condition.transformDown { + case expr => val matchedIndex = canonicalizedCdProjAsExpr.indexWhere(_ == expr) + if (matchedIndex != -1) { + projectionToForceOnCdPlan(matchedIndex) + } else { + expr + } + } + f.copy(condition = transformedCondn) + }) + } + + private def replacementProjectListForIncomingProject( + incomingProject: Project, + directlyMappedincomingToCachedPlanIndx: Seq[(Int, Int)], + cdPlanProject: Project, + cdAttribToCommonAttribForIncmngNe: Map[Attribute, Attribute], + transformedIndirectlyMappableExpr: Map[Int, NamedExpression]): Seq[NamedExpression] = + { + incomingProject.projectList.zipWithIndex.map { + case (ne, indx) => + directlyMappedincomingToCachedPlanIndx.find(_._1 == indx).map { + case (_, cdIndex) => + ne match { + case attr: Attribute => attr + case al: Alias => + val cdAttr = cdPlanProject.projectList(cdIndex).toAttribute + al.copy(child = cdAttribToCommonAttribForIncmngNe(cdAttr))( + exprId = al.exprId, qualifier = al.qualifier, + explicitMetadata = al.explicitMetadata, + nonInheritableMetadataKeys = al.nonInheritableMetadataKeys + ) + } + }.getOrElse({ + transformedIndirectlyMappableExpr(indx) + }) + } + } + + private def transformIndirectlyMappedExpressionsToUseCachedPlanAttributes( + inComingProjNoDirectMapping: Seq[(Int, Int)], + incomingProject: Project, + cdPlanProject: Project, + directlyMappedincomingToCachedPlanIndx: Seq[(Int, Int)], + cdAttribToCommonAttribForIncmngNe: Map[Attribute, Attribute], + unusedAttribsOfCDPlanToGenIncomingAttr: Seq[(Int, AttributeReference)], + canonicalizedCdProjList: Seq[NamedExpression]): Map[Int, NamedExpression] = + { + inComingProjNoDirectMapping.map { + case (incomngIndex, _) => + val indirectIncmnNe = incomingProject.projectList(incomngIndex) + val modifiedNe = indirectIncmnNe.transformDown { + case expr => directlyMappedincomingToCachedPlanIndx.find { + case (incomingIndex, _) => + val directMappedNe = incomingProject.projectList(incomingIndex) + directMappedNe.toAttribute == expr || + directMappedNe.children.headOption.contains(expr) + }.map { + case (_, cdIndex) => + val cdAttrib = cdPlanProject.projectList(cdIndex).toAttribute + cdAttribToCommonAttribForIncmngNe(cdAttrib) + }.orElse( + unusedAttribsOfCDPlanToGenIncomingAttr.find { + case (i, _) => val cdNe = canonicalizedCdProjList(i) + cdNe.children.headOption.contains(expr) + }.map(_._2)). + map(ne => ne.toAttribute).getOrElse(expr) + }.asInstanceOf[NamedExpression] + + incomngIndex -> modifiedNe + }.toMap + } + + private def getDirectAndIndirectMappingOfIncomingToCachedProjectAttribs( + incomingProject: Project, + canonicalizedCdProjList: Seq[NamedExpression]): (Seq[(Int, Int)], Seq[(Int, Int)]) = + { + incomingProject.projectList.zipWithIndex.map { + case (inComingNE, index) => + // first check for equivalent named expressions..if index is != -1, that means + // it is pass thru Alias or pass thru - Attribute + var matchIndexInCdPlanProj = canonicalizedCdProjList.indexWhere(_ == inComingNE) + if (matchIndexInCdPlanProj == -1) { + // if match index is -1, that means it could be two possibilities: + // 1) it is a case of rename which means the incoming expr is an alias and + // its child is an attrib ref, which may have a direct attribref in the + // cdPlanProj, or it may actually have an alias whose ref matches the ref + // of incoming attribRef + // 2) the positions in the incoming project alias and the cdPlanProject are + // different. as a result the canonicalized alias of each would have + // relatively different exprIDs ( as their relative positions differ), but + // even in such cases as their child logical plans are same, so the child + // expression of each alias will have same canonicalized data + val incomingExprToCheck = inComingNE match { + case x: AttributeReference => x + case Alias(expr, _) => expr + } + matchIndexInCdPlanProj = canonicalizedCdProjList.indexWhere { + case Alias(expr, _) => expr == incomingExprToCheck + case x => x == incomingExprToCheck + } + } + index -> matchIndexInCdPlanProj + }.partition(_._2 != -1) + } + + /** Replaces segments of the given logical plan with cached versions where possible. */ + def useCachedData(plan: LogicalPlan): LogicalPlan = { val newPlan = plan transformDown { case command: IgnoreCachedData => command - case currentFragment => + case currentFragment if !currentFragment.isInstanceOf[InMemoryRelation] => lookupCachedDataInternal(currentFragment).map { cached => + // After cache lookup, we should still keep the hints from the input plan. val hints = EliminateResolvedHint.extractHintsFromPlan(currentFragment)._2 - val cachedPlan = cached.cachedRepresentation.withOutput(currentFragment.output) + val cachedPlan = cached.cachedRepresentation.map(_.withOutput(currentFragment.output)). + merge + // The returned hint list is in top-down order, we should create the hint nodes from // right to left. hints.foldRight[LogicalPlan](cachedPlan) { case (hint, p) => @@ -486,4 +772,90 @@ object CacheManager extends Logging { case _ => logTrace(f) } } + val inMemoryRelationExtractor: LogicalPlan => InMemoryRelation = + plan => plan.collectLeaves().head.asInstanceOf[InMemoryRelation] +} + +object CompatibilityChecker { + def unapply(data: (LogicalPlan, Seq[Filter], LogicalPlan)): Option[(Seq[Filter], Project)] = { + val(incomingChild, incomingFilterChain, cachedPlan) = data + cachedPlan match { + case p: Project if incomingChild.sameResult(p.child) => Option(incomingFilterChain -> p) + + case f: Filter => + val collectedFilters = mutable.ListBuffer[Filter](f) + var projectFound: Option[Project] = None + var child: LogicalPlan = f.child + var keepChecking = true + while (keepChecking) { + child match { + case x: Filter => child = x.child + collectedFilters += x + case p: Project => projectFound = Option(p) + keepChecking = false + case _ => keepChecking = false + } + } + if (collectedFilters.size <= incomingFilterChain.size && + projectFound.exists(_.child.sameResult(incomingChild))) { + val (residualIncomingFilterChain, otherFilterChain) = incomingFilterChain.splitAt( + incomingFilterChain.size - collectedFilters.size) + val isCompatible = if (otherFilterChain.isEmpty) { + true + } else { + // the other filter chain must be equal to the collected filter chain + // But we need to transform the collected Filter chain such that it is below + // the project of the cached plan, we have found, as the incoming filters are also below + // the incoming project. + val mappingFilterExpr = AttributeMap(projectFound.get.projectList.flatMap { + case _: Attribute => Seq.empty[(Attribute, (NamedExpression, Expression))] + case al: Alias => Seq(al.toAttribute -> (al, al.child)) + }) + + val modifiedCdFilters = collectedFilters.map(f => + f.copy(condition = EarlyCollapseProject.expressionRemapper( + f.condition, mappingFilterExpr))).toSeq + val chainedFilter1 = combineFilterChainUsingRoot(otherFilterChain, + EmptyRelation(incomingChild.output)) + val chainedFilter2 = combineFilterChainUsingRoot(modifiedCdFilters, + EmptyRelation(projectFound.map(_.child).get.output)) + chainedFilter1.sameResult(chainedFilter2) + } + if (isCompatible) { + Option(residualIncomingFilterChain -> projectFound.get) + } else { + None + } + } else { + None + } + + case _ => None + } + } + + def combineFilterChainUsingRoot(filters: Seq[Filter], root: LogicalPlan): Filter = { + val lastFilterNode = filters.last + val lastFilterMod = lastFilterNode.copy(child = root) + filters.dropRight(1).foldRight(lastFilterMod)((f, c) => f.copy(child = c)) + } + + def extractChildIgnoringFiltersFromIncomingProject(incomingProject: Project): + (LogicalPlan, Seq[Filter]) = { + val collectedFilters = mutable.ListBuffer[Filter]() + var child: LogicalPlan = incomingProject.child + var keepChecking = true + while (keepChecking) { + child match { + case f: Filter => child = f.child + collectedFilters += f + case _ => keepChecking = false + } + } + (child, collectedFilters.toSeq) + } + + case class EmptyRelation(output: Seq[Attribute]) extends LeafNode { + override def maxRows: Option[Long] = Some(0) + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/EarlyCollapseProject.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/EarlyCollapseProject.scala new file mode 100644 index 000000000000..5bd27b677c37 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/EarlyCollapseProject.scala @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.analysis + +import scala.collection.mutable +import scala.util.{Failure, Success, Try} + +import org.apache.spark.sql.Dataset +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, AttributeReference, Expression, NamedExpression, UserDefinedExpression} +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.types.{Metadata, MetadataBuilder} +import org.apache.spark.util.Utils + + +private[sql] object EarlyCollapseProject extends Rule[LogicalPlan] { + val expressionRemapper: (Expression, AttributeMap[(NamedExpression, Expression)]) => Expression = + (expr, mappings) => { + expr transformUp { + case attr: AttributeReference => mappings.get(attr).map { + case (_, expr) => expr + }.getOrElse(attr) + } + } + def apply(logicalPlan: LogicalPlan): LogicalPlan = + logicalPlan match { + case newP @ Project(_, p : Project) if checkEarlyCollapsePossible(newP, p) => + collapseProjectEarly(newP, p) getOrElse newP + + case newP@Project(_, f@Filter(_, filterChild: UnaryNode)) => + // check if its case of nested filters followed by project + val filterNodes = mutable.ListBuffer(f) + var projectAtEnd: Option[Project] = None + var keepGoing = true + var currentChild = filterChild + while (keepGoing) { + currentChild match { + case p: Project => projectAtEnd = Option(p) + keepGoing = false + case filter @ Filter(expr, u: UnaryNode) if expr.deterministic => + filterNodes += filter + currentChild = u + case _ => keepGoing = false + } + } + if (projectAtEnd.isDefined) { + val p = projectAtEnd.get + if (checkEarlyCollapsePossible(newP, p)) { + val newProjOpt = collapseProjectEarly(newP, p) + val mappingFilterExpr = AttributeMap(p.projectList.flatMap(ne => ne match { + case _: Attribute => Seq.empty[(Attribute, (NamedExpression, Expression))] + case al: Alias => Seq(al.toAttribute -> (al, al.child)) + })) + newProjOpt.map(collapsedProj => { + val lastFilterNode = filterNodes.last + val lastFilterMod = lastFilterNode.copy( + condition = expressionRemapper(lastFilterNode.condition, mappingFilterExpr), + child = collapsedProj.child) + val filterChain = filterNodes.dropRight(1).foldRight(lastFilterMod)((f, c) => + f.copy(condition = expressionRemapper(f.condition, mappingFilterExpr), child = c)) + collapsedProj.copy(child = filterChain) + }).getOrElse { + newP + } + } else { + newP + } + } else { + newP + } + + case _ => logicalPlan + } + + private def checkEarlyCollapsePossible(newP: Project, p: Project): Boolean = + newP.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty && + p.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty && + !p.child.isInstanceOf[Window] && + p.projectList.forall(_.collectFirst { + case ex if !ex.deterministic => ex + case ex: UserDefinedExpression => ex + }.isEmpty) + + private def transferMetadata(from: Attribute, to: NamedExpression): NamedExpression = + if (from.metadata == Metadata.empty) { + to + } else { + to match { + case al: Alias => + val newMdBuilder = new MetadataBuilder().withMetadata(from.metadata) + val newMd = newMdBuilder.build() + al.copy()(exprId = al.exprId, qualifier = from.qualifier, + nonInheritableMetadataKeys = al.nonInheritableMetadataKeys, + explicitMetadata = Option(newMd)) + + case attr: AttributeReference => attr.copy(metadata = from.metadata)( + exprId = attr.exprId, qualifier = from.qualifier) + } + } + + + + def collapseProjectEarly(newP: Project, p: Project): Option[Project] = { + val child = p.child + val newProjList = newP.projectList + val projList = p.projectList + val childOutput = child.outputSet + val attribsToExprInProj = AttributeMap( + projList.flatMap(ne => ne match { + case al@Alias(child, _) => child match { + case attr: Attribute if childOutput.contains(attr) => + Seq(al.toAttribute -> (al, transferMetadata(al.toAttribute, attr))) + + case _ => Seq(al.toAttribute -> (al, child)) + } + + case _ => Seq.empty[(Attribute, (NamedExpression, Expression))] + })) + + val remappedNewProjListResult = Try { + newProjList.map { + case attr: AttributeReference => attribsToExprInProj.get(attr).map { + case (al : Alias, _) => if (attr.name == al.name) { + transferMetadata(attr, al) + } else { + // To Handle the case of change of (Caps/lowercase) via toSchema resulting + // in rename + transferMetadata(attr, al.copy(name = attr.name)( + exprId = al.exprId, qualifier = al.qualifier, + explicitMetadata = al.explicitMetadata, + nonInheritableMetadataKeys = al.nonInheritableMetadataKeys)) + } + }.getOrElse(attr) + + case ne => expressionRemapper(ne, attribsToExprInProj).asInstanceOf[NamedExpression] + } + } + + remappedNewProjListResult match { + case Success(remappedNewProjList) => + val newProject = Project(remappedNewProjList, child) + val droppedNamedExprs = projList.filter(ne => + remappedNewProjList.forall(_.toAttribute != ne.toAttribute)) + val prevDroppedColsPart1 = p.getTagValue(LogicalPlan.DROPPED_NAMED_EXPRESSIONS). + getOrElse(Seq.empty) + // remove any attribs which have been added back in the new project list + val prevDroppedColsPart2 = prevDroppedColsPart1.filterNot(x => + remappedNewProjList.exists(y => y.toAttribute == x.toAttribute || y.name == x.name)) + val prevDroppedColsFinal = prevDroppedColsPart2.filterNot(x => + droppedNamedExprs.exists(y => y == x || y.name == x.name)) + val newDroppedList = droppedNamedExprs ++ prevDroppedColsFinal + newProject.copyTagsFrom(p) + // remove the datasetId copied from current P due to above copy + newProject.unsetTagValue(Dataset.DATASET_ID_TAG) + // use the dataset id of the incoming new project + newP.getTagValue(Dataset.DATASET_ID_TAG).foreach(map => + newProject.setTagValue(Dataset.DATASET_ID_TAG, map.clone())) + newProject.unsetTagValue(LogicalPlan.DROPPED_NAMED_EXPRESSIONS) + if (newDroppedList.nonEmpty) { + newProject.setTagValue(LogicalPlan.DROPPED_NAMED_EXPRESSIONS, newDroppedList) + } + Option(newProject) + + case Failure(x) => if (Utils.isTesting) { + throw x + } else { + None + } + } + + } +} + diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala index 7b0ce3e59263..f5a63bc38e09 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala @@ -62,12 +62,16 @@ case class AnalyzeColumnCommand( private def analyzeColumnInCachedData(plan: LogicalPlan, sparkSession: SparkSession): Boolean = { val cacheManager = sparkSession.sharedState.cacheManager val df = Dataset.ofRows(sparkSession, plan) - cacheManager.lookupCachedData(df).map { cachedData => - val columnsToAnalyze = getColumnsToAnalyze( - tableIdent, cachedData.cachedRepresentation, columnNames, allColumns) - cacheManager.analyzeColumnCacheQuery(sparkSession, cachedData, columnsToAnalyze) - cachedData - }.isDefined + cacheManager.lookupCachedData(df).exists { cachedData => + if (cachedData.cachedRepresentation.isRight) { + val columnsToAnalyze = getColumnsToAnalyze( + tableIdent, cachedData.cachedRepresentation.merge, columnNames, allColumns) + cacheManager.analyzeColumnCacheQuery(sparkSession, cachedData, columnsToAnalyze) + true + } else { + false + } + } } private def analyzeColumnInTempView(plan: LogicalPlan, sparkSession: SparkSession): Unit = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala index 7acd1cb0852b..72e7b19410c9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala @@ -240,7 +240,7 @@ object CommandUtils extends Logging { // Analyzes a catalog view if the view is cached val table = sparkSession.table(tableIdent.quotedString) val cacheManager = sparkSession.sharedState.cacheManager - if (cacheManager.lookupCachedData(table).isDefined) { + if (cacheManager.lookupCachedData(table).exists(_.cachedRepresentation.isRight)) { if (!noScan) { // To collect table stats, materializes an underlying columnar RDD table.count() diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index ee0074dfe61b..d889a475ed1e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -39,6 +39,7 @@ import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIfNeed import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.TableIdentifierHelper import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} +import org.apache.spark.sql.execution.CacheManager import org.apache.spark.sql.execution.datasources.DataSource import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat import org.apache.spark.sql.execution.datasources.json.JsonFileFormat @@ -201,7 +202,8 @@ case class AlterTableRenameCommand( // If `optStorageLevel` is defined, the old table was cached. val optCachedData = sparkSession.sharedState.cacheManager.lookupCachedData( sparkSession.table(oldName.unquotedString)) - val optStorageLevel = optCachedData.map(_.cachedRepresentation.cacheBuilder.storageLevel) + val optStorageLevel = optCachedData.map(_.cachedRepresentation. + fold(CacheManager.inMemoryRelationExtractor, identity).cacheBuilder.storageLevel) if (optStorageLevel.isDefined) { CommandUtils.uncacheTableOrView(sparkSession, oldName) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 7a668b75c3c7..3bdb0e870a66 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -41,7 +41,7 @@ import org.apache.spark.sql.connector.read.LocalScan import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream} import org.apache.spark.sql.connector.write.V1Write import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} -import org.apache.spark.sql.execution.{FilterExec, InSubqueryExec, LeafExecNode, LocalTableScanExec, ProjectExec, RowDataSourceScanExec, SparkPlan} +import org.apache.spark.sql.execution.{CacheManager, FilterExec, InSubqueryExec, LeafExecNode, LocalTableScanExec, ProjectExec, RowDataSourceScanExec, SparkPlan} import org.apache.spark.sql.execution.command.CommandUtils import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, LogicalRelation, PushableColumnAndNestedColumn} import org.apache.spark.sql.execution.streaming.continuous.{WriteToContinuousDataSource, WriteToContinuousDataSourceExec} @@ -85,8 +85,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat val v2Relation = DataSourceV2Relation.create(r.table, Some(r.catalog), Some(r.identifier)) val cache = session.sharedState.cacheManager.lookupCachedData(session, v2Relation) session.sharedState.cacheManager.uncacheQuery(session, v2Relation, cascade = true) - if (cache.isDefined) { - val cacheLevel = cache.get.cachedRepresentation.cacheBuilder.storageLevel + if (cache.exists(_.cachedRepresentation.isRight)) { + val cacheLevel = cache.get.cachedRepresentation. + fold(CacheManager.inMemoryRelationExtractor, identity).cacheBuilder.storageLevel Some(cacheLevel) } else { None diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala index 4660970814e2..62721e4d6d16 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala @@ -31,7 +31,7 @@ import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.{ColumnarRule, CommandExecutionMode, QueryExecution, SparkOptimizer, SparkPlanner, SparkSqlParser} import org.apache.spark.sql.execution.adaptive.AdaptiveRulesHolder import org.apache.spark.sql.execution.aggregate.{ResolveEncodersInScalaAgg, ScalaUDAF} -import org.apache.spark.sql.execution.analysis.DetectAmbiguousSelfJoin +import org.apache.spark.sql.execution.analysis.{DetectAmbiguousSelfJoin, EarlyCollapseProject} import org.apache.spark.sql.execution.command.CommandCheck import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.execution.datasources.v2.{TableCapabilityCheck, V2SessionCatalog} @@ -217,6 +217,9 @@ abstract class BaseSessionStateBuilder( ReplaceCharWithVarchar +: customPostHocResolutionRules + override val postAnalysisEarlyOptimizationRules: Seq[Rule[LogicalPlan]] = + EarlyCollapseProject +: Nil + override val extendedCheckRules: Seq[LogicalPlan => Unit] = PreWriteCheck +: PreReadCheck +: diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out index b3bfec1fe3a8..4b9df1a1d641 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out @@ -114,14 +114,13 @@ org.apache.spark.sql.AnalysisException -- !query SELECT k AS lca, lca + 1 AS col FROM v1 GROUP BY lca -- !query analysis -Project [lca#x, (lca#x + 1) AS col#x] -+- Project [k#x, k#x AS lca#x] - +- Aggregate [k#x], [k#x] - +- SubqueryAlias v1 - +- View (`v1`, [a#x, b#x, k#x]) - +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x, cast(k#x as int) AS k#x] - +- SubqueryAlias t - +- LocalRelation [a#x, b#x, k#x] +Project [k#x AS lca#x, (k#x + 1) AS col#x] ++- Aggregate [k#x], [k#x] + +- SubqueryAlias v1 + +- View (`v1`, [a#x, b#x, k#x]) + +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x, cast(k#x as int) AS k#x] + +- SubqueryAlias t + +- LocalRelation [a#x, b#x, k#x] -- !query diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out index 857c574af3d2..644cdc288721 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out @@ -71,7 +71,26 @@ CreateViewCommand `nt4`, select * from values SELECT * FROM nt1 natural join nt2 -- !query analysis Project [k#x, v1#x, v2#x] -+- Project [k#x, v1#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] + + +-- !query +SELECT * FROM nt1 natural join nt2 where k = "one" +-- !query analysis +Project [k#x, v1#x, v2#x] ++- Filter (k#x = one) +- Join Inner, (k#x = k#x) :- SubqueryAlias nt1 : +- View (`nt1`, [k#x, v1#x]) @@ -87,27 +106,6 @@ Project [k#x, v1#x, v2#x] +- LocalRelation [k#x, v2#x] --- !query -SELECT * FROM nt1 natural join nt2 where k = "one" --- !query analysis -Project [k#x, v1#x, v2#x] -+- Filter (k#x = one) - +- Project [k#x, v1#x, v2#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] - - -- !query SELECT * FROM nt1 natural left join nt2 order by v1, v2 -- !query analysis @@ -174,20 +172,19 @@ Aggregate [count(1) AS count(1)#xL] SELECT k FROM nt1 natural join nt2 -- !query analysis Project [k#x] -+- Project [k#x, v1#x, v2#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -195,27 +192,6 @@ SELECT k FROM nt1 natural join nt2 where k = "one" -- !query analysis Project [k#x] +- Filter (k#x = one) - +- Project [k#x, v1#x, v2#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] - - --- !query -SELECT nt1.* FROM nt1 natural join nt2 --- !query analysis -Project [k#x, v1#x] -+- Project [k#x, v1#x, v2#x] +- Join Inner, (k#x = k#x) :- SubqueryAlias nt1 : +- View (`nt1`, [k#x, v1#x]) @@ -231,24 +207,42 @@ Project [k#x, v1#x] +- LocalRelation [k#x, v2#x] +-- !query +SELECT nt1.* FROM nt1 natural join nt2 +-- !query analysis +Project [k#x, v1#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] + + -- !query SELECT nt2.* FROM nt1 natural join nt2 -- !query analysis Project [k#x, v2#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -299,60 +293,57 @@ Project [k#x] SELECT nt1.*, nt2.* FROM nt1 natural join nt2 -- !query analysis Project [k#x, v1#x, k#x, v2#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT *, nt2.k FROM nt1 natural join nt2 -- !query analysis Project [k#x, v1#x, v2#x, k#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.k, nt2.k FROM nt1 natural join nt2 -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -403,20 +394,19 @@ SELECT nt1.k, nt2.k FROM nt1 natural join nt2 where k = "one" -- !query analysis Project [k#x, k#x] +- Filter (k#x = one) - +- Project [k#x, v1#x, v2#x, k#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] + +- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -510,56 +500,54 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException SELECT * FROM nt1 natural join nt2 natural join nt3 -- !query analysis Project [k#x, v1#x, v2#x, v3#x] -+- Project [k#x, v1#x, v2#x, v3#x] - +- Join Inner, (k#x = k#x) - :- Project [k#x, v1#x, v2#x] - : +- Join Inner, (k#x = k#x) - : :- SubqueryAlias nt1 - : : +- View (`nt1`, [k#x, v1#x]) - : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : : +- Project [k#x, v1#x] - : : +- SubqueryAlias nt1 - : : +- LocalRelation [k#x, v1#x] - : +- SubqueryAlias nt2 - : +- View (`nt2`, [k#x, v2#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - : +- Project [k#x, v2#x] - : +- SubqueryAlias nt2 - : +- LocalRelation [k#x, v2#x] - +- SubqueryAlias nt3 - +- View (`nt3`, [k#x, v3#x]) - +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] - +- Project [k#x, v3#x] - +- SubqueryAlias nt3 - +- LocalRelation [k#x, v3#x] ++- Join Inner, (k#x = k#x) + :- Project [k#x, v1#x, v2#x] + : +- Join Inner, (k#x = k#x) + : :- SubqueryAlias nt1 + : : +- View (`nt1`, [k#x, v1#x]) + : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : : +- Project [k#x, v1#x] + : : +- SubqueryAlias nt1 + : : +- LocalRelation [k#x, v1#x] + : +- SubqueryAlias nt2 + : +- View (`nt2`, [k#x, v2#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + : +- Project [k#x, v2#x] + : +- SubqueryAlias nt2 + : +- LocalRelation [k#x, v2#x] + +- SubqueryAlias nt3 + +- View (`nt3`, [k#x, v3#x]) + +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] + +- Project [k#x, v3#x] + +- SubqueryAlias nt3 + +- LocalRelation [k#x, v3#x] -- !query SELECT nt1.*, nt2.*, nt3.* FROM nt1 natural join nt2 natural join nt3 -- !query analysis Project [k#x, v1#x, k#x, v2#x, k#x, v3#x] -+- Project [k#x, v1#x, v2#x, v3#x, k#x, k#x] - +- Join Inner, (k#x = k#x) - :- Project [k#x, v1#x, v2#x, k#x] - : +- Join Inner, (k#x = k#x) - : :- SubqueryAlias nt1 - : : +- View (`nt1`, [k#x, v1#x]) - : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : : +- Project [k#x, v1#x] - : : +- SubqueryAlias nt1 - : : +- LocalRelation [k#x, v1#x] - : +- SubqueryAlias nt2 - : +- View (`nt2`, [k#x, v2#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - : +- Project [k#x, v2#x] - : +- SubqueryAlias nt2 - : +- LocalRelation [k#x, v2#x] - +- SubqueryAlias nt3 - +- View (`nt3`, [k#x, v3#x]) - +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] - +- Project [k#x, v3#x] - +- SubqueryAlias nt3 - +- LocalRelation [k#x, v3#x] ++- Join Inner, (k#x = k#x) + :- Project [k#x, v1#x, v2#x, k#x] + : +- Join Inner, (k#x = k#x) + : :- SubqueryAlias nt1 + : : +- View (`nt1`, [k#x, v1#x]) + : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : : +- Project [k#x, v1#x] + : : +- SubqueryAlias nt1 + : : +- LocalRelation [k#x, v1#x] + : +- SubqueryAlias nt2 + : +- View (`nt2`, [k#x, v2#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + : +- Project [k#x, v2#x] + : +- SubqueryAlias nt2 + : +- LocalRelation [k#x, v2#x] + +- SubqueryAlias nt3 + +- View (`nt3`, [k#x, v3#x]) + +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] + +- Project [k#x, v3#x] + +- SubqueryAlias nt3 + +- LocalRelation [k#x, v3#x] -- !query @@ -620,61 +608,59 @@ Project [k#x, v1#x, v2#x, k#x, v3#x] SELECT * FROM nt1 natural join nt2 join nt3 on nt2.k = nt3.k -- !query analysis Project [k#x, v1#x, v2#x, k#x, v3#x] -+- Project [k#x, v1#x, v2#x, k#x, v3#x] - +- Join Inner, (k#x = k#x) - :- Project [k#x, v1#x, v2#x, k#x] - : +- Join Inner, (k#x = k#x) - : :- SubqueryAlias nt1 - : : +- View (`nt1`, [k#x, v1#x]) - : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : : +- Project [k#x, v1#x] - : : +- SubqueryAlias nt1 - : : +- LocalRelation [k#x, v1#x] - : +- SubqueryAlias nt2 - : +- View (`nt2`, [k#x, v2#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - : +- Project [k#x, v2#x] - : +- SubqueryAlias nt2 - : +- LocalRelation [k#x, v2#x] - +- SubqueryAlias nt3 - +- View (`nt3`, [k#x, v3#x]) - +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] - +- Project [k#x, v3#x] - +- SubqueryAlias nt3 - +- LocalRelation [k#x, v3#x] ++- Join Inner, (k#x = k#x) + :- Project [k#x, v1#x, v2#x, k#x] + : +- Join Inner, (k#x = k#x) + : :- SubqueryAlias nt1 + : : +- View (`nt1`, [k#x, v1#x]) + : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : : +- Project [k#x, v1#x] + : : +- SubqueryAlias nt1 + : : +- LocalRelation [k#x, v1#x] + : +- SubqueryAlias nt2 + : +- View (`nt2`, [k#x, v2#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + : +- Project [k#x, v2#x] + : +- SubqueryAlias nt2 + : +- LocalRelation [k#x, v2#x] + +- SubqueryAlias nt3 + +- View (`nt3`, [k#x, v3#x]) + +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] + +- Project [k#x, v3#x] + +- SubqueryAlias nt3 + +- LocalRelation [k#x, v3#x] -- !query SELECT nt1.*, nt2.*, nt3.*, nt4.* FROM nt1 natural join nt2 natural join nt3 natural join nt4 -- !query analysis Project [k#x, v1#x, k#x, v2#x, k#x, v3#x, k#x, v4#x] -+- Project [k#x, v1#x, v2#x, v3#x, v4#x, k#x, k#x, k#x] - +- Join Inner, (k#x = k#x) - :- Project [k#x, v1#x, v2#x, v3#x, k#x, k#x] - : +- Join Inner, (k#x = k#x) - : :- Project [k#x, v1#x, v2#x, k#x] - : : +- Join Inner, (k#x = k#x) - : : :- SubqueryAlias nt1 - : : : +- View (`nt1`, [k#x, v1#x]) - : : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : : : +- Project [k#x, v1#x] - : : : +- SubqueryAlias nt1 - : : : +- LocalRelation [k#x, v1#x] - : : +- SubqueryAlias nt2 - : : +- View (`nt2`, [k#x, v2#x]) - : : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - : : +- Project [k#x, v2#x] - : : +- SubqueryAlias nt2 - : : +- LocalRelation [k#x, v2#x] - : +- SubqueryAlias nt3 - : +- View (`nt3`, [k#x, v3#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] - : +- Project [k#x, v3#x] - : +- SubqueryAlias nt3 - : +- LocalRelation [k#x, v3#x] - +- SubqueryAlias nt4 - +- View (`nt4`, [k#x, v4#x]) - +- Project [cast(k#x as string) AS k#x, cast(v4#x as int) AS v4#x] - +- Project [k#x, v4#x] - +- SubqueryAlias nt4 - +- LocalRelation [k#x, v4#x] ++- Join Inner, (k#x = k#x) + :- Project [k#x, v1#x, v2#x, v3#x, k#x, k#x] + : +- Join Inner, (k#x = k#x) + : :- Project [k#x, v1#x, v2#x, k#x] + : : +- Join Inner, (k#x = k#x) + : : :- SubqueryAlias nt1 + : : : +- View (`nt1`, [k#x, v1#x]) + : : : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : : : +- Project [k#x, v1#x] + : : : +- SubqueryAlias nt1 + : : : +- LocalRelation [k#x, v1#x] + : : +- SubqueryAlias nt2 + : : +- View (`nt2`, [k#x, v2#x]) + : : +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + : : +- Project [k#x, v2#x] + : : +- SubqueryAlias nt2 + : : +- LocalRelation [k#x, v2#x] + : +- SubqueryAlias nt3 + : +- View (`nt3`, [k#x, v3#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x] + : +- Project [k#x, v3#x] + : +- SubqueryAlias nt3 + : +- LocalRelation [k#x, v3#x] + +- SubqueryAlias nt4 + +- View (`nt4`, [k#x, v4#x]) + +- Project [cast(k#x as string) AS k#x, cast(v4#x as int) AS v4#x] + +- Project [k#x, v4#x] + +- SubqueryAlias nt4 + +- LocalRelation [k#x, v4#x] diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out index 93f2e240a019..cf1f0ea216e1 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out @@ -59,18 +59,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET#xL, Java#xL] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java#xL] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java#xL] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -80,16 +79,15 @@ PIVOT ( FOR year IN (2012, 2013) ) -- !query analysis -Project [course#x, 2012#xL, 2013#xL] -+- Project [course#x, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[0] AS 2012#xL, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[1] AS 2013#xL] - +- Aggregate [course#x], [course#x, pivotfirst(year#x, sum(coursesales.earnings)#xL, 2012, 2013, 0, 0) AS __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x] - +- Aggregate [course#x, year#x], [course#x, year#x, sum(earnings#x) AS sum(coursesales.earnings)#xL] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [course#x, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[0] AS 2012#xL, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[1] AS 2013#xL] ++- Aggregate [course#x], [course#x, pivotfirst(year#x, sum(coursesales.earnings)#xL, 2012, 2013, 0, 0) AS __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x] + +- Aggregate [course#x, year#x], [course#x, year#x, sum(earnings#x) AS sum(coursesales.earnings)#xL] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -101,18 +99,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET_sum(earnings)#xL, dotNET_avg(earnings)#x, Java_sum(earnings)#xL, Java_avg(earnings)#x] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_avg(earnings)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x[1] AS Java_avg(earnings)#x] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, avg(__auto_generated_subquery_name.earnings)#x, dotNET, Java, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, avg(earnings#x) AS avg(__auto_generated_subquery_name.earnings)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_avg(earnings)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x[1] AS Java_avg(earnings)#x] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, avg(__auto_generated_subquery_name.earnings)#x, dotNET, Java, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, avg(earnings#x) AS avg(__auto_generated_subquery_name.earnings)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -124,18 +121,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [dotNET#xL, Java#xL] -+- Project [__pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java#xL] - +- Aggregate [pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [course#x], [course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java#xL] ++- Aggregate [pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [course#x], [course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -147,18 +143,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [dotNET_sum(earnings)#xL, dotNET_min(year)#x, Java_sum(earnings)#xL, Java_min(year)#x] -+- Project [__pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x[0] AS dotNET_min(year)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x[1] AS Java_min(year)#x] - +- Aggregate [pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, min(__auto_generated_subquery_name.year)#x, dotNET, Java, 0, 0) AS __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x] - +- Aggregate [course#x], [course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, min(year#x) AS min(__auto_generated_subquery_name.year)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x[0] AS dotNET_min(year)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x[1] AS Java_min(year)#x] ++- Aggregate [pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, min(__auto_generated_subquery_name.year)#x, dotNET, Java, 0, 0) AS __pivot_min(__auto_generated_subquery_name.year) AS `min(__auto_generated_subquery_name.year)`#x] + +- Aggregate [course#x], [course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, min(year#x) AS min(__auto_generated_subquery_name.year)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -172,25 +167,24 @@ PIVOT ( FOR s IN (1, 2) ) -- !query analysis -Project [course#x, year#x, 1#xL, 2#xL] -+- Project [course#x, year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS 1#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS 2#xL] - +- Aggregate [course#x, year#x], [course#x, year#x, pivotfirst(s#x, sum(__auto_generated_subquery_name.earnings)#xL, 1, 2, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [course#x, year#x, s#x], [course#x, year#x, s#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [course#x, year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS 1#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS 2#xL] ++- Aggregate [course#x, year#x], [course#x, year#x, pivotfirst(s#x, sum(__auto_generated_subquery_name.earnings)#xL, 1, 2, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [course#x, year#x, s#x], [course#x, year#x, s#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -204,25 +198,24 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET_sum(earnings)#xL, dotNET_min(s)#x, Java_sum(earnings)#xL, Java_min(s)#x] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x[0] AS dotNET_min(s)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x[1] AS Java_min(s)#x] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, min(__auto_generated_subquery_name.s)#x, dotNET, Java, 0, 0) AS __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, min(s#x) AS min(__auto_generated_subquery_name.s)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x[0] AS dotNET_min(s)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x[1] AS Java_min(s)#x] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, min(__auto_generated_subquery_name.s)#x, dotNET, Java, 0, 0) AS __pivot_min(__auto_generated_subquery_name.s) AS `min(__auto_generated_subquery_name.s)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, min(s#x) AS min(__auto_generated_subquery_name.s)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -236,25 +229,24 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET#xL, Java#xL] -+- Project [year#x, __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x[0] AS dotNET#xL, __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x[1] AS Java#xL] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))#xL, dotNET, Java, 0, 0) AS __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, sum((earnings#x * s#x)) AS sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x[0] AS dotNET#xL, __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x[1] AS Java#xL] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))#xL, dotNET, Java, 0, 0) AS __pivot_sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s)) AS `sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, sum((earnings#x * s#x)) AS sum((__auto_generated_subquery_name.earnings * __auto_generated_subquery_name.s))#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -266,18 +258,17 @@ PIVOT ( FOR y IN (2012, 2013) ) -- !query analysis -Project [2012_s#xL, 2013_s#xL, 2012_a#x, 2013_a#x, c#x] -+- Project [c#x, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[0] AS 2012_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[0] AS 2012_a#x, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[1] AS 2013_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[1] AS 2013_a#x] - +- Aggregate [c#x], [c#x, pivotfirst(y#x, sum(__auto_generated_subquery_name.e) AS s#xL, 2012, 2013, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x, pivotfirst(y#x, avg(__auto_generated_subquery_name.e) AS a#x, 2012, 2013, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x] - +- Aggregate [c#x, y#x], [c#x, y#x, sum(e#x) AS sum(__auto_generated_subquery_name.e) AS s#xL, avg(e#x) AS avg(__auto_generated_subquery_name.e) AS a#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[0] AS 2012_s#xL, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[1] AS 2013_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[0] AS 2012_a#x, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[1] AS 2013_a#x, c#x] ++- Aggregate [c#x], [c#x, pivotfirst(y#x, sum(__auto_generated_subquery_name.e) AS s#xL, 2012, 2013, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x, pivotfirst(y#x, avg(__auto_generated_subquery_name.e) AS a#x, 2012, 2013, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x] + +- Aggregate [c#x, y#x], [c#x, y#x, sum(e#x) AS sum(__auto_generated_subquery_name.e) AS s#xL, avg(e#x) AS avg(__auto_generated_subquery_name.e) AS a#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -289,18 +280,17 @@ PIVOT ( FOR y IN (2012 as firstYear, 2013 secondYear) ) -- !query analysis -Project [firstYear_s#xL, secondYear_s#xL, firstYear_a#x, secondYear_a#x, c#x] -+- Project [c#x, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[0] AS firstYear_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[0] AS firstYear_a#x, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[1] AS secondYear_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[1] AS secondYear_a#x] - +- Aggregate [c#x], [c#x, pivotfirst(y#x, sum(__auto_generated_subquery_name.e) AS s#xL, 2012, 2013, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x, pivotfirst(y#x, avg(__auto_generated_subquery_name.e) AS a#x, 2012, 2013, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x] - +- Aggregate [c#x, y#x], [c#x, y#x, sum(e#x) AS sum(__auto_generated_subquery_name.e) AS s#xL, avg(e#x) AS avg(__auto_generated_subquery_name.e) AS a#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[0] AS firstYear_s#xL, __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x[1] AS secondYear_s#xL, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[0] AS firstYear_a#x, __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x[1] AS secondYear_a#x, c#x] ++- Aggregate [c#x], [c#x, pivotfirst(y#x, sum(__auto_generated_subquery_name.e) AS s#xL, 2012, 2013, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.e) AS s AS `sum(__auto_generated_subquery_name.e) AS s`#x, pivotfirst(y#x, avg(__auto_generated_subquery_name.e) AS a#x, 2012, 2013, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.e) AS a AS `avg(__auto_generated_subquery_name.e) AS a`#x] + +- Aggregate [c#x, y#x], [c#x, y#x, sum(e#x) AS sum(__auto_generated_subquery_name.e) AS s#xL, avg(e#x) AS avg(__auto_generated_subquery_name.e) AS a#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -373,18 +363,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET_CEIL(sum(earnings))#xL, dotNET_a1#x, Java_CEIL(sum(earnings))#xL, Java_a1#x] -+- Project [year#x, __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x[0] AS dotNET_CEIL(sum(earnings))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[0] AS dotNET_a1#x, __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x[1] AS Java_CEIL(sum(earnings))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[1] AS Java_a1#x] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, CEIL(sum(__auto_generated_subquery_name.earnings))#xL, dotNET, Java, 0, 0) AS __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x, pivotfirst(course#x, (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x, dotNET, Java, 0, 0) AS __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, CEIL(sum(earnings#x)) AS CEIL(sum(__auto_generated_subquery_name.earnings))#xL, (avg(earnings#x) + cast(1 as double)) AS (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [year#x, __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x[0] AS dotNET_CEIL(sum(earnings))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[0] AS dotNET_a1#x, __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x[1] AS Java_CEIL(sum(earnings))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[1] AS Java_a1#x] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, CEIL(sum(__auto_generated_subquery_name.earnings))#xL, dotNET, Java, 0, 0) AS __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x, pivotfirst(course#x, (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x, dotNET, Java, 0, 0) AS __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, CEIL(sum(earnings#x)) AS CEIL(sum(__auto_generated_subquery_name.earnings))#xL, (avg(earnings#x) + cast(1 as double)) AS (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -421,25 +410,24 @@ PIVOT ( FOR (course, year) IN (('dotNET', 2012), ('Java', 2013)) ) -- !query analysis -Project [s#x, {dotNET, 2012}#xL, {Java, 2013}#xL] -+- Project [s#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, 2012}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, 2013}#xL] - +- Aggregate [s#x], [s#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,2012], [Java,2013], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [s#x, named_struct(course, course#x, year, year#x)], [s#x, named_struct(course, course#x, year, year#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [s#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, 2012}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, 2013}#xL] ++- Aggregate [s#x], [s#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,2012], [Java,2013], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [s#x, named_struct(course, course#x, year, year#x)], [s#x, named_struct(course, course#x, year, year#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -453,25 +441,24 @@ PIVOT ( FOR (course, s) IN (('dotNET', 2) as c1, ('Java', 1) as c2) ) -- !query analysis -Project [year#x, c1#xL, c2#xL] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS c1#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS c2#xL] - +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,2], [Java,1], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS c1#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS c2#xL] ++- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,2], [Java,1], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -610,25 +597,24 @@ PIVOT ( FOR a IN (array(1, 1), array(2, 2)) ) -- !query analysis -Project [year#x, [1, 1]#xL, [2, 2]#xL] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS [1, 1]#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS [2, 2]#xL] - +- Aggregate [year#x], [year#x, pivotfirst(a#x, sum(__auto_generated_subquery_name.earnings)#xL, [1,1], [2,2], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, a#x], [year#x, a#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [earnings#x, year#x, a#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS [1, 1]#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS [2, 2]#xL] ++- Aggregate [year#x], [year#x, pivotfirst(a#x, sum(__auto_generated_subquery_name.earnings)#xL, [1,1], [2,2], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, a#x], [year#x, a#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [earnings#x, year#x, a#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -642,25 +628,24 @@ PIVOT ( FOR (course, a) IN (('dotNET', array(1, 1)), ('Java', array(2, 2))) ) -- !query analysis -Project [year#x, {dotNET, [1, 1]}#xL, {Java, [2, 2]}#xL] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, [1, 1]}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, [2, 2]}#xL] - +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,[1,1]], [Java,[2,2]], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, named_struct(course, course#x, a, a#x)], [year#x, named_struct(course, course#x, a, a#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, year#x, a#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, [1, 1]}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, [2, 2]}#xL] ++- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,[1,1]], [Java,[2,2]], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, named_struct(course, course#x, a, a#x)], [year#x, named_struct(course, course#x, a, a#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, year#x, a#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -674,25 +659,24 @@ PIVOT ( FOR s IN ((1, 'a'), (2, 'b')) ) -- !query analysis -Project [year#x, {1, a}#xL, {2, b}#xL] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {1, a}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {2, b}#xL] - +- Aggregate [year#x], [year#x, pivotfirst(s#x, sum(__auto_generated_subquery_name.earnings)#xL, [1,a], [2,b], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, s#x], [year#x, s#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [earnings#x, year#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {1, a}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {2, b}#xL] ++- Aggregate [year#x], [year#x, pivotfirst(s#x, sum(__auto_generated_subquery_name.earnings)#xL, [1,a], [2,b], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, s#x], [year#x, s#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [earnings#x, year#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -706,25 +690,24 @@ PIVOT ( FOR (course, s) IN (('dotNET', (1, 'a')), ('Java', (2, 'b'))) ) -- !query analysis -Project [year#x, {dotNET, {1, a}}#xL, {Java, {2, b}}#xL] -+- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, {1, a}}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, {2, b}}#xL] - +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,[1,a]], [Java,[2,b]], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] - +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, year#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS {dotNET, {1, a}}#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS {Java, {2, b}}#xL] ++- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, sum(__auto_generated_subquery_name.earnings)#xL, [dotNET,[1,a]], [Java,[2,b]], 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, year#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -779,15 +762,14 @@ PIVOT ( FOR Course IN ('dotNET', 'Java') ) -- !query analysis -Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, dotNET#xL, Java#xL] -+- Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x[1] AS Java#xL] - +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, pivotfirst(Course#x, sum(__auto_generated_subquery_name.Earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x] - +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x, sum(Earnings#x) AS sum(__auto_generated_subquery_name.Earnings)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, a AS a#x, z AS z#x, b AS b#x, y AS y#x, c AS c#x, x AS x#x, d AS d#x, w AS w#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x[0] AS dotNET#xL, __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x[1] AS Java#xL] ++- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, pivotfirst(Course#x, sum(__auto_generated_subquery_name.Earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.Earnings) AS `sum(__auto_generated_subquery_name.Earnings)`#x] + +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x, sum(Earnings#x) AS sum(__auto_generated_subquery_name.Earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, a AS a#x, z AS z#x, b AS b#x, y AS y#x, c AS c#x, x AS x#x, d AS d#x, w AS w#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out index 37ec8291c4e4..bb4a3c54c797 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out @@ -415,12 +415,11 @@ SELECT '' AS `xxx`, * FROM J1_TBL INNER JOIN J2_TBL USING (i) -- !query analysis Project [ AS xxx#x, i#x, j#x, t#x, k#x] -+- Project [i#x, j#x, t#x, k#x] - +- Join Inner, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join Inner, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -428,12 +427,11 @@ SELECT '' AS `xxx`, * FROM J1_TBL JOIN J2_TBL USING (i) -- !query analysis Project [ AS xxx#x, i#x, j#x, t#x, k#x] -+- Project [i#x, j#x, t#x, k#x] - +- Join Inner, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join Inner, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -460,12 +458,11 @@ SELECT '' AS `xxx`, * FROM J1_TBL NATURAL JOIN J2_TBL -- !query analysis Project [ AS xxx#x, i#x, j#x, t#x, k#x] -+- Project [i#x, j#x, t#x, k#x] - +- Join Inner, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join Inner, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -473,16 +470,15 @@ SELECT '' AS `xxx`, * FROM J1_TBL t1 (a, b, c) NATURAL JOIN J2_TBL t2 (a, d) -- !query analysis Project [ AS xxx#x, a#x, b#x, c#x, d#x] -+- Project [a#x, b#x, c#x, d#x] - +- Join Inner, (a#x = a#x) - :- SubqueryAlias t1 - : +- Project [i#x AS a#x, j#x AS b#x, t#x AS c#x] - : +- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias t2 - +- Project [i#x AS a#x, k#x AS d#x] - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join Inner, (a#x = a#x) + :- SubqueryAlias t1 + : +- Project [i#x AS a#x, j#x AS b#x, t#x AS c#x] + : +- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias t2 + +- Project [i#x AS a#x, k#x AS d#x] + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -490,16 +486,15 @@ SELECT '' AS `xxx`, * FROM J1_TBL t1 (a, b, c) NATURAL JOIN J2_TBL t2 (d, a) -- !query analysis Project [ AS xxx#x, a#x, b#x, c#x, d#x] -+- Project [a#x, b#x, c#x, d#x] - +- Join Inner, (a#x = a#x) - :- SubqueryAlias t1 - : +- Project [i#x AS a#x, j#x AS b#x, t#x AS c#x] - : +- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias t2 - +- Project [i#x AS d#x, k#x AS a#x] - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join Inner, (a#x = a#x) + :- SubqueryAlias t1 + : +- Project [i#x AS a#x, j#x AS b#x, t#x AS c#x] + : +- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias t2 + +- Project [i#x AS d#x, k#x AS a#x] + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -573,12 +568,11 @@ SELECT '' AS `xxx`, * FROM J1_TBL RIGHT OUTER JOIN J2_TBL USING (i) -- !query analysis Project [ AS xxx#x, i#x, j#x, t#x, k#x] -+- Project [i#x, j#x, t#x, k#x] - +- Join RightOuter, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join RightOuter, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -586,12 +580,11 @@ SELECT '' AS `xxx`, * FROM J1_TBL RIGHT JOIN J2_TBL USING (i) -- !query analysis Project [ AS xxx#x, i#x, j#x, t#x, k#x] -+- Project [i#x, j#x, t#x, k#x] - +- Join RightOuter, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join RightOuter, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -630,12 +623,11 @@ SELECT '' AS `xxx`, * -- !query analysis Project [ AS xxx#x, i#x, j#x, t#x, k#x] +- Filter (k#x = 1) - +- Project [i#x, j#x, t#x, k#x] - +- Join LeftOuter, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet + +- Join LeftOuter, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -644,12 +636,11 @@ SELECT '' AS `xxx`, * -- !query analysis Project [ AS xxx#x, i#x, j#x, t#x, k#x] +- Filter (i#x = 1) - +- Project [i#x, j#x, t#x, k#x] - +- Join LeftOuter, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet + +- Join LeftOuter, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -729,17 +720,16 @@ InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_d -- !query SELECT * FROM t1 FULL JOIN t2 USING (name) FULL JOIN t3 USING (name) -- !query analysis -Project [name#x, n#x, n#x, n#x] -+- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x, n#x] - +- Join FullOuter, (name#x = name#x) - :- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x] - : +- Join FullOuter, (name#x = name#x) - : :- SubqueryAlias spark_catalog.default.t1 - : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, n#x, n#x, n#x] ++- Join FullOuter, (name#x = name#x) + :- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x] + : +- Join FullOuter, (name#x = name#x) + : :- SubqueryAlias spark_catalog.default.t1 + : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -750,16 +740,15 @@ INNER JOIN USING (name) -- !query analysis Project [name#x, n#x, n#x] -+- Project [name#x, n#x, n#x] - +- Join Inner, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join Inner, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -770,16 +759,15 @@ LEFT JOIN USING (name) -- !query analysis Project [name#x, n#x, n#x] -+- Project [name#x, n#x, n#x] - +- Join LeftOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join LeftOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -789,17 +777,16 @@ FULL JOIN (SELECT * FROM t3) s3 USING (name) -- !query analysis -Project [name#x, n#x, n#x] -+- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, n#x, n#x] ++- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -809,16 +796,15 @@ NATURAL INNER JOIN (SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join Inner, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join Inner, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -828,16 +814,15 @@ NATURAL LEFT JOIN (SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join LeftOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join LeftOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -846,17 +831,16 @@ SELECT * FROM NATURAL FULL JOIN (SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis -Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] ++- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -868,22 +852,21 @@ NATURAL INNER JOIN (SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join Inner, (name#x = name#x) - :- Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] - : +- Join Inner, (name#x = name#x) - : :- SubqueryAlias s1 - : : +- Project [name#x, n#x AS s1_n#x, 1 AS s1_1#x] - : : +- SubqueryAlias spark_catalog.default.t1 - : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - : +- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join Inner, (name#x = name#x) + :- Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] + : +- Join Inner, (name#x = name#x) + : :- SubqueryAlias s1 + : : +- Project [name#x, n#x AS s1_n#x, 1 AS s1_1#x] + : : +- SubqueryAlias spark_catalog.default.t1 + : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + : +- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -894,23 +877,22 @@ NATURAL FULL JOIN NATURAL FULL JOIN (SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis -Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join FullOuter, (name#x = name#x) - :- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] - : +- Join FullOuter, (name#x = name#x) - : :- SubqueryAlias s1 - : : +- Project [name#x, n#x AS s1_n#x, 1 AS s1_1#x] - : : +- SubqueryAlias spark_catalog.default.t1 - : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - : +- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] ++- Join FullOuter, (name#x = name#x) + :- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] + : +- Join FullOuter, (name#x = name#x) + : :- SubqueryAlias s1 + : : +- Project [name#x, n#x AS s1_n#x, 1 AS s1_1#x] + : : +- SubqueryAlias spark_catalog.default.t1 + : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + : +- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -923,25 +905,24 @@ NATURAL FULL JOIN (SELECT name, n as s3_n FROM t3) as s3 ) ss2 -- !query analysis -Project [name#x, s1_n#x, s2_n#x, s3_n#x] -+- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s2_n#x, s3_n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s1 - : +- Project [name#x, n#x AS s1_n#x] - : +- SubqueryAlias spark_catalog.default.t1 - : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - +- SubqueryAlias ss2 - +- Project [name#x, s2_n#x, s3_n#x] - +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s3_n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s2_n#x, s3_n#x] ++- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s1 + : +- Project [name#x, n#x AS s1_n#x] + : +- SubqueryAlias spark_catalog.default.t1 + : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + +- SubqueryAlias ss2 + +- Project [name#x, s2_n#x, s3_n#x] + +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s3_n#x] + +- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -954,25 +935,24 @@ NATURAL FULL JOIN (SELECT name, n as s3_n FROM t3) as s3 ) ss2 -- !query analysis -Project [name#x, s1_n#x, s2_n#x, s2_2#x, s3_n#x] -+- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s2_n#x, s2_2#x, s3_n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s1 - : +- Project [name#x, n#x AS s1_n#x] - : +- SubqueryAlias spark_catalog.default.t1 - : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - +- SubqueryAlias ss2 - +- Project [name#x, s2_n#x, s2_2#x, s3_n#x] - +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x AS s3_n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s2_n#x, s2_2#x, s3_n#x] ++- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s1 + : +- Project [name#x, n#x AS s1_n#x] + : +- SubqueryAlias spark_catalog.default.t1 + : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + +- SubqueryAlias ss2 + +- Project [name#x, s2_n#x, s2_2#x, s3_n#x] + +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x] + +- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x AS s3_n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out index c74124402c55..1f53d5705885 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out @@ -415,12 +415,11 @@ SELECT udf('') AS `xxx`, udf(i) AS i, udf(j), udf(t) AS t, udf(k) FROM J1_TBL INNER JOIN J2_TBL USING (i) -- !query analysis Project [cast(udf(cast( as string)) as string) AS xxx#x, cast(udf(cast(i#x as string)) as int) AS i#x, cast(udf(cast(j#x as string)) as int) AS udf(j)#x, cast(udf(cast(t#x as string)) as string) AS t#x, cast(udf(cast(k#x as string)) as int) AS udf(k)#x] -+- Project [i#x, j#x, t#x, k#x] - +- Join Inner, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join Inner, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -428,12 +427,11 @@ SELECT udf(udf('')) AS `xxx`, udf(i), udf(j) AS j, udf(t), udf(k) AS k FROM J1_TBL JOIN J2_TBL USING (i) -- !query analysis Project [cast(udf(cast(cast(udf(cast( as string)) as string) as string)) as string) AS xxx#x, cast(udf(cast(i#x as string)) as int) AS udf(i)#x, cast(udf(cast(j#x as string)) as int) AS j#x, cast(udf(cast(t#x as string)) as string) AS udf(t)#x, cast(udf(cast(k#x as string)) as int) AS k#x] -+- Project [i#x, j#x, t#x, k#x] - +- Join Inner, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join Inner, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -460,12 +458,11 @@ SELECT udf(udf('')) AS `xxx`, udf(i), udf(j), udf(t), udf(k) FROM J1_TBL NATURAL JOIN J2_TBL -- !query analysis Project [cast(udf(cast(cast(udf(cast( as string)) as string) as string)) as string) AS xxx#x, cast(udf(cast(i#x as string)) as int) AS udf(i)#x, cast(udf(cast(j#x as string)) as int) AS udf(j)#x, cast(udf(cast(t#x as string)) as string) AS udf(t)#x, cast(udf(cast(k#x as string)) as int) AS udf(k)#x] -+- Project [i#x, j#x, t#x, k#x] - +- Join Inner, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join Inner, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -473,16 +470,15 @@ SELECT udf('') AS `xxx`, udf(udf(udf(a))) AS a, udf(b), udf(c), udf(d) FROM J1_TBL t1 (a, b, c) NATURAL JOIN J2_TBL t2 (a, d) -- !query analysis Project [cast(udf(cast( as string)) as string) AS xxx#x, cast(udf(cast(cast(udf(cast(cast(udf(cast(a#x as string)) as int) as string)) as int) as string)) as int) AS a#x, cast(udf(cast(b#x as string)) as int) AS udf(b)#x, cast(udf(cast(c#x as string)) as string) AS udf(c)#x, cast(udf(cast(d#x as string)) as int) AS udf(d)#x] -+- Project [a#x, b#x, c#x, d#x] - +- Join Inner, (a#x = a#x) - :- SubqueryAlias t1 - : +- Project [i#x AS a#x, j#x AS b#x, t#x AS c#x] - : +- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias t2 - +- Project [i#x AS a#x, k#x AS d#x] - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join Inner, (a#x = a#x) + :- SubqueryAlias t1 + : +- Project [i#x AS a#x, j#x AS b#x, t#x AS c#x] + : +- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias t2 + +- Project [i#x AS a#x, k#x AS d#x] + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -490,16 +486,15 @@ SELECT udf('') AS `xxx`, udf(udf(a)), udf(udf(b)), udf(udf(c)) AS c, udf(udf(udf FROM J1_TBL t1 (a, b, c) NATURAL JOIN J2_TBL t2 (d, a) -- !query analysis Project [cast(udf(cast( as string)) as string) AS xxx#x, cast(udf(cast(cast(udf(cast(a#x as string)) as int) as string)) as int) AS udf(udf(a))#x, cast(udf(cast(cast(udf(cast(b#x as string)) as int) as string)) as int) AS udf(udf(b))#x, cast(udf(cast(cast(udf(cast(c#x as string)) as string) as string)) as string) AS c#x, cast(udf(cast(cast(udf(cast(cast(udf(cast(d#x as string)) as int) as string)) as int) as string)) as int) AS d#x] -+- Project [a#x, b#x, c#x, d#x] - +- Join Inner, (a#x = a#x) - :- SubqueryAlias t1 - : +- Project [i#x AS a#x, j#x AS b#x, t#x AS c#x] - : +- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias t2 - +- Project [i#x AS d#x, k#x AS a#x] - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join Inner, (a#x = a#x) + :- SubqueryAlias t1 + : +- Project [i#x AS a#x, j#x AS b#x, t#x AS c#x] + : +- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias t2 + +- Project [i#x AS d#x, k#x AS a#x] + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -575,12 +570,11 @@ SELECT udf('') AS `xxx`, udf(udf(i)), udf(j), udf(t), udf(k) FROM J1_TBL RIGHT OUTER JOIN J2_TBL USING (i) -- !query analysis Project [cast(udf(cast( as string)) as string) AS xxx#x, cast(udf(cast(cast(udf(cast(i#x as string)) as int) as string)) as int) AS udf(udf(i))#x, cast(udf(cast(j#x as string)) as int) AS udf(j)#x, cast(udf(cast(t#x as string)) as string) AS udf(t)#x, cast(udf(cast(k#x as string)) as int) AS udf(k)#x] -+- Project [i#x, j#x, t#x, k#x] - +- Join RightOuter, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join RightOuter, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -588,12 +582,11 @@ SELECT udf('') AS `xxx`, udf(i), udf(udf(j)), udf(t), udf(k) FROM J1_TBL RIGHT JOIN J2_TBL USING (i) -- !query analysis Project [cast(udf(cast( as string)) as string) AS xxx#x, cast(udf(cast(i#x as string)) as int) AS udf(i)#x, cast(udf(cast(cast(udf(cast(j#x as string)) as int) as string)) as int) AS udf(udf(j))#x, cast(udf(cast(t#x as string)) as string) AS udf(t)#x, cast(udf(cast(k#x as string)) as int) AS udf(k)#x] -+- Project [i#x, j#x, t#x, k#x] - +- Join RightOuter, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet ++- Join RightOuter, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -634,12 +627,11 @@ SELECT udf('') AS `xxx`, udf(i), udf(j), udf(t), udf(udf(k)) -- !query analysis Project [cast(udf(cast( as string)) as string) AS xxx#x, cast(udf(cast(i#x as string)) as int) AS udf(i)#x, cast(udf(cast(j#x as string)) as int) AS udf(j)#x, cast(udf(cast(t#x as string)) as string) AS udf(t)#x, cast(udf(cast(cast(udf(cast(k#x as string)) as int) as string)) as int) AS udf(udf(k))#x] +- Filter (cast(udf(cast(k#x as string)) as int) = 1) - +- Project [i#x, j#x, t#x, k#x] - +- Join LeftOuter, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet + +- Join LeftOuter, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -648,12 +640,11 @@ SELECT udf('') AS `xxx`, udf(i), udf(j), udf(t), udf(k) -- !query analysis Project [cast(udf(cast( as string)) as string) AS xxx#x, cast(udf(cast(i#x as string)) as int) AS udf(i)#x, cast(udf(cast(j#x as string)) as int) AS udf(j)#x, cast(udf(cast(t#x as string)) as string) AS udf(t)#x, cast(udf(cast(k#x as string)) as int) AS udf(k)#x] +- Filter (cast(udf(cast(cast(udf(cast(i#x as string)) as int) as string)) as int) = cast(udf(cast(1 as string)) as int)) - +- Project [i#x, j#x, t#x, k#x] - +- Join LeftOuter, (i#x = i#x) - :- SubqueryAlias spark_catalog.default.j1_tbl - : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet - +- SubqueryAlias spark_catalog.default.j2_tbl - +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet + +- Join LeftOuter, (i#x = i#x) + :- SubqueryAlias spark_catalog.default.j1_tbl + : +- Relation spark_catalog.default.j1_tbl[i#x,j#x,t#x] parquet + +- SubqueryAlias spark_catalog.default.j2_tbl + +- Relation spark_catalog.default.j2_tbl[i#x,k#x] parquet -- !query @@ -733,17 +724,16 @@ InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_d -- !query SELECT * FROM t1 FULL JOIN t2 USING (name) FULL JOIN t3 USING (name) -- !query analysis -Project [name#x, n#x, n#x, n#x] -+- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x, n#x] - +- Join FullOuter, (name#x = name#x) - :- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x] - : +- Join FullOuter, (name#x = name#x) - : :- SubqueryAlias spark_catalog.default.t1 - : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, n#x, n#x, n#x] ++- Join FullOuter, (name#x = name#x) + :- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x] + : +- Join FullOuter, (name#x = name#x) + : :- SubqueryAlias spark_catalog.default.t1 + : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -754,16 +744,15 @@ INNER JOIN USING (name) -- !query analysis Project [name#x, n#x, n#x] -+- Project [name#x, n#x, n#x] - +- Join Inner, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join Inner, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -774,16 +763,15 @@ LEFT JOIN USING (name) -- !query analysis Project [name#x, n#x, n#x] -+- Project [name#x, n#x, n#x] - +- Join LeftOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join LeftOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -793,17 +781,16 @@ FULL JOIN (SELECT * FROM t3) s3 USING (name) -- !query analysis -Project [cast(udf(cast(name#x as string)) as string) AS udf(name)#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS udf(udf(n))#x, cast(udf(cast(n#x as string)) as int) AS udf(n)#x] -+- Project [coalesce(name#x, name#x) AS name#x, n#x, n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [cast(udf(cast(coalesce(name#x, name#x) as string)) as string) AS udf(name)#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS udf(udf(n))#x, cast(udf(cast(n#x as string)) as int) AS udf(n)#x] ++- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -813,16 +800,15 @@ NATURAL INNER JOIN (SELECT udf(name) as name, udf(udf(n)) as s3_n, udf(3) as s3_2 FROM t3) s3 -- !query analysis Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join Inner, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x, cast(udf(cast(2 as string)) as int) AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s3_n#x, cast(udf(cast(3 as string)) as int) AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join Inner, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x, cast(udf(cast(2 as string)) as int) AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s3_n#x, cast(udf(cast(3 as string)) as int) AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -832,16 +818,15 @@ NATURAL LEFT JOIN (SELECT udf(udf(name)) as name, udf(n) as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join LeftOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join LeftOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -850,17 +835,16 @@ SELECT * FROM NATURAL FULL JOIN (SELECT udf(udf(name)) as name, udf(udf(n)) as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis -Project [name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] ++- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -872,22 +856,21 @@ NATURAL INNER JOIN (SELECT udf(udf(udf(name))) as name, udf(n) as s3_n, 3 as s3_2 FROM t3) s3 -- !query analysis Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join Inner, (name#x = name#x) - :- Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] - : +- Join Inner, (name#x = name#x) - : :- SubqueryAlias s1 - : : +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s1_n#x, 1 AS s1_1#x] - : : +- SubqueryAlias spark_catalog.default.t1 - : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - : +- SubqueryAlias s2 - : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x, 3 AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet ++- Join Inner, (name#x = name#x) + :- Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] + : +- Join Inner, (name#x = name#x) + : :- SubqueryAlias s1 + : : +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s1_n#x, 1 AS s1_1#x] + : : +- SubqueryAlias spark_catalog.default.t1 + : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + : +- SubqueryAlias s2 + : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x, 3 AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -898,23 +881,22 @@ NATURAL FULL JOIN NATURAL FULL JOIN (SELECT udf(udf(name)) as name, udf(n) as s3_n, udf(3) as s3_2 FROM t3) s3 -- !query analysis -Project [name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] -+- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] - +- Join FullOuter, (name#x = name#x) - :- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] - : +- Join FullOuter, (name#x = name#x) - : :- SubqueryAlias s1 - : : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s1_n#x, cast(udf(cast(cast(udf(cast(1 as string)) as int) as string)) as int) AS s1_1#x] - : : +- SubqueryAlias spark_catalog.default.t1 - : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - : +- SubqueryAlias s2 - : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s2_n#x, cast(udf(cast(2 as string)) as int) AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x, cast(udf(cast(3 as string)) as int) AS s3_2#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x, s3_n#x, s3_2#x] ++- Join FullOuter, (name#x = name#x) + :- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s1_1#x, s2_n#x, s2_2#x] + : +- Join FullOuter, (name#x = name#x) + : :- SubqueryAlias s1 + : : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s1_n#x, cast(udf(cast(cast(udf(cast(1 as string)) as int) as string)) as int) AS s1_1#x] + : : +- SubqueryAlias spark_catalog.default.t1 + : : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + : +- SubqueryAlias s2 + : +- Project [cast(udf(cast(name#x as string)) as string) AS name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s2_n#x, cast(udf(cast(2 as string)) as int) AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [cast(udf(cast(cast(udf(cast(name#x as string)) as string) as string)) as string) AS name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x, cast(udf(cast(3 as string)) as int) AS s3_2#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -927,25 +909,24 @@ NATURAL FULL JOIN (SELECT name, udf(udf(n)) as s3_n FROM t3) as s3 ) ss2 -- !query analysis -Project [name#x, cast(udf(cast(cast(udf(cast(s1_n#x as string)) as int) as string)) as int) AS udf(udf(s1_n))#x, cast(udf(cast(s2_n#x as string)) as int) AS udf(s2_n)#x, cast(udf(cast(s3_n#x as string)) as int) AS udf(s3_n)#x] -+- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s2_n#x, s3_n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s1 - : +- Project [name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s1_n#x] - : +- SubqueryAlias spark_catalog.default.t1 - : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - +- SubqueryAlias ss2 - +- Project [name#x, s2_n#x, s3_n#x] - +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s3_n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s3_n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, cast(udf(cast(cast(udf(cast(s1_n#x as string)) as int) as string)) as int) AS udf(udf(s1_n))#x, cast(udf(cast(s2_n#x as string)) as int) AS udf(s2_n)#x, cast(udf(cast(s3_n#x as string)) as int) AS udf(s3_n)#x] ++- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s1 + : +- Project [name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s1_n#x] + : +- SubqueryAlias spark_catalog.default.t1 + : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + +- SubqueryAlias ss2 + +- Project [name#x, s2_n#x, s3_n#x] + +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s3_n#x] + +- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, cast(udf(cast(n#x as string)) as int) AS s2_n#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s3_n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query @@ -958,25 +939,24 @@ NATURAL FULL JOIN (SELECT name, udf(n) as s3_n FROM t3) as s3 ) ss2 -- !query analysis -Project [name#x, s1_n#x, s2_n#x, s2_2#x, s3_n#x] -+- Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s2_n#x, s2_2#x, s3_n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s1 - : +- Project [name#x, n#x AS s1_n#x] - : +- SubqueryAlias spark_catalog.default.t1 - : +- Relation spark_catalog.default.t1[name#x,n#x] parquet - +- SubqueryAlias ss2 - +- Project [name#x, s2_n#x, s2_2#x, s3_n#x] - +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x] - +- Join FullOuter, (name#x = name#x) - :- SubqueryAlias s2 - : +- Project [name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s2_n#x, 2 AS s2_2#x] - : +- SubqueryAlias spark_catalog.default.t2 - : +- Relation spark_catalog.default.t2[name#x,n#x] parquet - +- SubqueryAlias s3 - +- Project [name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x] - +- SubqueryAlias spark_catalog.default.t3 - +- Relation spark_catalog.default.t3[name#x,n#x] parquet +Project [coalesce(name#x, name#x) AS name#x, s1_n#x, s2_n#x, s2_2#x, s3_n#x] ++- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s1 + : +- Project [name#x, n#x AS s1_n#x] + : +- SubqueryAlias spark_catalog.default.t1 + : +- Relation spark_catalog.default.t1[name#x,n#x] parquet + +- SubqueryAlias ss2 + +- Project [name#x, s2_n#x, s2_2#x, s3_n#x] + +- Project [coalesce(name#x, name#x) AS name#x, s2_n#x, s2_2#x, s3_n#x] + +- Join FullOuter, (name#x = name#x) + :- SubqueryAlias s2 + : +- Project [name#x, cast(udf(cast(cast(udf(cast(n#x as string)) as int) as string)) as int) AS s2_n#x, 2 AS s2_2#x] + : +- SubqueryAlias spark_catalog.default.t2 + : +- Relation spark_catalog.default.t2[name#x,n#x] parquet + +- SubqueryAlias s3 + +- Project [name#x, cast(udf(cast(n#x as string)) as int) AS s3_n#x] + +- SubqueryAlias spark_catalog.default.t3 + +- Relation spark_catalog.default.t3[name#x,n#x] parquet -- !query diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-natural-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-natural-join.sql.out index 5fc413c66326..516b05208b1f 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-natural-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-natural-join.sql.out @@ -38,20 +38,19 @@ SELECT * FROM nt1 natural join nt2 where udf(k) = "one" -- !query analysis Project [k#x, v1#x, v2#x] +- Filter (cast(udf(cast(k#x as string)) as string) = one) - +- Project [k#x, v1#x, v2#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] + +- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out index 5cfa86309f6d..c775116973c2 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out @@ -59,18 +59,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [udf(year)#x, dotNET#xL, Java#xL] -+- Project [udf(year)#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] - +- Aggregate [udf(year)#x], [udf(year)#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [udf(year)#x, course#x], [udf(year)#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [cast(udf(cast(year#x as string)) as int) AS udf(year)#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [udf(year)#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] ++- Aggregate [udf(year)#x], [udf(year)#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [udf(year)#x, course#x], [udf(year)#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [cast(udf(cast(year#x as string)) as int) AS udf(year)#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -80,16 +79,15 @@ PIVOT ( FOR year IN (2012, 2013) ) -- !query analysis -Project [course#x, 2012#xL, 2013#xL] -+- Project [course#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS 2012#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS 2013#xL] - +- Aggregate [course#x], [course#x, pivotfirst(year#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [course#x, year#x], [course#x, year#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [course#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS 2012#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS 2013#xL] ++- Aggregate [course#x], [course#x, pivotfirst(year#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [course#x, year#x], [course#x, year#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -101,18 +99,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET_udf(sum(earnings))#xL, dotNET_udf(avg(earnings))#x, Java_udf(sum(earnings))#xL, Java_udf(avg(earnings))#x] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x[0] AS dotNET_udf(avg(earnings))#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x[1] AS Java_udf(avg(earnings))#x] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, cast(udf(cast(avg(earnings#x) as string)) as double) AS CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x[0] AS dotNET_udf(avg(earnings))#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x[1] AS Java_udf(avg(earnings))#x] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(avg(earnings) as string)) AS DOUBLE) AS `CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, cast(udf(cast(avg(earnings#x) as string)) as double) AS CAST(udf(cast(avg(earnings) as string)) AS DOUBLE)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -124,18 +121,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [dotNET#xL, Java#xL] -+- Project [__pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] - +- Aggregate [pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [course#x], [course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [cast(udf(cast(course#x as string)) as string) AS course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] ++- Aggregate [pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [course#x], [course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [cast(udf(cast(course#x as string)) as string) AS course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -147,18 +143,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [dotNET_udf(sum(udf(earnings)))#xL, dotNET_udf(min(year))#x, Java_udf(sum(udf(earnings)))#xL, Java_udf(min(year))#x] -+- Project [__pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(udf(earnings)))#xL, __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x[0] AS dotNET_udf(min(year))#x, __pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(udf(earnings)))#xL, __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x[1] AS Java_udf(min(year))#x] - +- Aggregate [pivotfirst(course#x, CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(min(year) as string)) AS INT)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x] - +- Aggregate [course#x], [course#x, cast(udf(cast(sum(cast(udf(cast(earnings#x as string)) as int)) as string)) as bigint) AS CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)#xL, cast(udf(cast(min(year#x) as string)) as int) AS CAST(udf(cast(min(year) as string)) AS INT)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(udf(earnings)))#xL, __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x[0] AS dotNET_udf(min(year))#x, __pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(udf(earnings)))#xL, __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x[1] AS Java_udf(min(year))#x] ++- Aggregate [pivotfirst(course#x, CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT) AS `CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(min(year) as string)) AS INT)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(min(year) as string)) AS INT) AS `CAST(udf(cast(min(year) as string)) AS INT)`#x] + +- Aggregate [course#x], [course#x, cast(udf(cast(sum(cast(udf(cast(earnings#x as string)) as int)) as string)) as bigint) AS CAST(udf(cast(sum(cast(udf(cast(earnings as string)) as int)) as string)) AS BIGINT)#xL, cast(udf(cast(min(year#x) as string)) as int) AS CAST(udf(cast(min(year) as string)) AS INT)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -172,25 +167,24 @@ PIVOT ( FOR s IN (1, 2) ) -- !query analysis -Project [course#x, year#x, 1#xL, 2#xL] -+- Project [course#x, year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS 1#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS 2#xL] - +- Aggregate [course#x, year#x], [course#x, year#x, pivotfirst(s#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, 1, 2, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [course#x, year#x, s#x], [course#x, year#x, s#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, cast(udf(cast(s#x as string)) as int) AS s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [course#x, year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS 1#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS 2#xL] ++- Aggregate [course#x, year#x], [course#x, year#x, pivotfirst(s#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, 1, 2, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [course#x, year#x, s#x], [course#x, year#x, s#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, cast(udf(cast(s#x as string)) as int) AS s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -204,25 +198,24 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET_udf(sum(earnings))#xL, dotNET_udf(min(s))#x, Java_udf(sum(earnings))#xL, Java_udf(min(s))#x] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x[0] AS dotNET_udf(min(s))#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x[1] AS Java_udf(min(s))#x] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(min(s) as string)) AS INT)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, cast(udf(cast(min(s#x) as string)) as int) AS CAST(udf(cast(min(s) as string)) AS INT)#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS dotNET_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x[0] AS dotNET_udf(min(s))#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS Java_udf(sum(earnings))#xL, __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x[1] AS Java_udf(min(s))#x] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x, pivotfirst(course#x, CAST(udf(cast(min(s) as string)) AS INT)#x, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(min(s) as string)) AS INT) AS `CAST(udf(cast(min(s) as string)) AS INT)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, cast(udf(cast(min(s#x) as string)) as int) AS CAST(udf(cast(min(s) as string)) AS INT)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -236,25 +229,24 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET#xL, Java#xL] -+- Project [year#x, __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x[1] AS Java#xL] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum((earnings#x * s#x)) as string)) as bigint) AS CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x[1] AS Java#xL] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT) AS `CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(sum((earnings#x * s#x)) as string)) as bigint) AS CAST(udf(cast(sum((earnings * s)) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -266,18 +258,17 @@ PIVOT ( FOR y IN (2012, 2013) ) -- !query analysis -Project [2012_s#xL, 2013_s#xL, 2012_a#x, 2013_a#x, c#x] -+- Project [c#x, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[0] AS 2012_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[0] AS 2012_a#x, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[1] AS 2013_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[1] AS 2013_a#x] - +- Aggregate [c#x], [c#x, pivotfirst(y#x, CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x, pivotfirst(y#x, CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x] - +- Aggregate [c#x, y#x], [c#x, y#x, cast(udf(cast(sum(e#x) as string)) as bigint) AS CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, cast(udf(cast(avg(e#x) as string)) as double) AS CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[0] AS 2012_s#xL, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[1] AS 2013_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[0] AS 2012_a#x, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[1] AS 2013_a#x, c#x] ++- Aggregate [c#x], [c#x, pivotfirst(y#x, CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x, pivotfirst(y#x, CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x] + +- Aggregate [c#x, y#x], [c#x, y#x, cast(udf(cast(sum(e#x) as string)) as bigint) AS CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, cast(udf(cast(avg(e#x) as string)) as double) AS CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -289,18 +280,17 @@ PIVOT ( FOR y IN (2012 as firstYear, 2013 secondYear) ) -- !query analysis -Project [firstYear_s#xL, secondYear_s#xL, firstYear_a#x, secondYear_a#x, c#x] -+- Project [c#x, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[0] AS firstYear_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[0] AS firstYear_a#x, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[1] AS secondYear_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[1] AS secondYear_a#x] - +- Aggregate [c#x], [c#x, pivotfirst(y#x, CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x, pivotfirst(y#x, CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x] - +- Aggregate [c#x, y#x], [c#x, y#x, cast(udf(cast(sum(e#x) as string)) as bigint) AS CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, cast(udf(cast(avg(e#x) as string)) as double) AS CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [__pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[0] AS firstYear_s#xL, __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x[1] AS secondYear_s#xL, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[0] AS firstYear_a#x, __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x[1] AS secondYear_a#x, c#x] ++- Aggregate [c#x], [c#x, pivotfirst(y#x, CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s AS `CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s`#x, pivotfirst(y#x, CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x, 2012, 2013, 0, 0) AS __pivot_CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a AS `CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a`#x] + +- Aggregate [c#x, y#x], [c#x, y#x, cast(udf(cast(sum(e#x) as string)) as bigint) AS CAST(udf(cast(sum(e) as string)) AS BIGINT) AS s#xL, cast(udf(cast(avg(e#x) as string)) as double) AS CAST(udf(cast(avg(e) as string)) AS DOUBLE) AS a#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -373,18 +363,17 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) -- !query analysis -Project [year#x, dotNET_udf(CEIL(udf(sum(earnings))))#xL, dotNET_a1#x, Java_udf(CEIL(udf(sum(earnings))))#xL, Java_a1#x] -+- Project [year#x, __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x[0] AS dotNET_udf(CEIL(udf(sum(earnings))))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[0] AS dotNET_a1#x, __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x[1] AS Java_udf(CEIL(udf(sum(earnings))))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[1] AS Java_a1#x] - +- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x, pivotfirst(course#x, (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x, dotNET, Java, 0, 0) AS __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x] - +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(CEIL(cast(udf(cast(sum(earnings#x) as string)) as bigint)) as string)) as bigint) AS CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)#xL, (avg(earnings#x) + cast(1 as double)) AS (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [year#x, course#x, earnings#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [year#x, __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x[0] AS dotNET_udf(CEIL(udf(sum(earnings))))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[0] AS dotNET_a1#x, __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x[1] AS Java_udf(CEIL(udf(sum(earnings))))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[1] AS Java_a1#x] ++- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x, pivotfirst(course#x, (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x, dotNET, Java, 0, 0) AS __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(CEIL(cast(udf(cast(sum(earnings#x) as string)) as bigint)) as string)) as bigint) AS CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)#xL, (avg(earnings#x) + cast(1 as double)) AS (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] -- !query @@ -421,25 +410,24 @@ PIVOT ( FOR (course, year) IN (('dotNET', 2012), ('Java', 2013)) ) -- !query analysis -Project [s#x, {dotNET, 2012}#xL, {Java, 2013}#xL] -+- Project [s#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, 2012}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, 2013}#xL] - +- Aggregate [s#x], [s#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,2012], [Java,2013], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [s#x, named_struct(course, course#x, year, year#x)], [s#x, named_struct(course, course#x, year, year#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [s#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, 2012}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, 2013}#xL] ++- Aggregate [s#x], [s#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,2012], [Java,2013], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [s#x, named_struct(course, course#x, year, year#x)], [s#x, named_struct(course, course#x, year, year#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -453,25 +441,24 @@ PIVOT ( FOR (course, s) IN (('dotNET', 2) as c1, ('Java', 1) as c2) ) -- !query analysis -Project [year#x, c1#xL, c2#xL] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS c1#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS c2#xL] - +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,2], [Java,1], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, year#x, earnings#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias years - +- View (`years`, [y#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] - +- Project [y#x, s#x] - +- SubqueryAlias years - +- LocalRelation [y#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS c1#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS c2#xL] ++- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,2], [Java,1], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, year#x, earnings#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias years + +- View (`years`, [y#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(s#x as int) AS s#x] + +- Project [y#x, s#x] + +- SubqueryAlias years + +- LocalRelation [y#x, s#x] -- !query @@ -550,25 +537,24 @@ PIVOT ( FOR a IN (array(1, 1), array(2, 2)) ) -- !query analysis -Project [year#x, [1, 1]#xL, [2, 2]#xL] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS [1, 1]#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS [2, 2]#xL] - +- Aggregate [year#x], [year#x, pivotfirst(a#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [1,1], [2,2], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, a#x], [year#x, a#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [earnings#x, year#x, a#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS [1, 1]#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS [2, 2]#xL] ++- Aggregate [year#x], [year#x, pivotfirst(a#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [1,1], [2,2], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, a#x], [year#x, a#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [earnings#x, year#x, a#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -582,25 +568,24 @@ PIVOT ( FOR (course, a) IN (('dotNET', array(1, 1)), ('Java', array(2, 2))) ) -- !query analysis -Project [year#x, {dotNET, [1, 1]}#xL, {Java, [2, 2]}#xL] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, [1, 1]}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, [2, 2]}#xL] - +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,[1,1]], [Java,[2,2]], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, named_struct(course, course#x, a, a#x)], [year#x, named_struct(course, course#x, a, a#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, cast(udf(cast(year#x as string)) as int) AS year#x, a#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, [1, 1]}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, [2, 2]}#xL] ++- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,[1,1]], [Java,[2,2]], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, named_struct(course, course#x, a, a#x)], [year#x, named_struct(course, course#x, a, a#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, cast(udf(cast(year#x as string)) as int) AS year#x, a#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -614,25 +599,24 @@ PIVOT ( FOR s IN ((1, 'a'), (2, 'b')) ) -- !query analysis -Project [year#x, {1, a}#xL, {2, b}#xL] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {1, a}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {2, b}#xL] - +- Aggregate [year#x], [year#x, pivotfirst(s#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [1,a], [2,b], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, s#x], [year#x, s#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [earnings#x, year#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {1, a}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {2, b}#xL] ++- Aggregate [year#x], [year#x, pivotfirst(s#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [1,a], [2,b], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, s#x], [year#x, s#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [earnings#x, year#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -646,25 +630,24 @@ PIVOT ( FOR (course, s) IN (('dotNET', (1, 'a')), ('Java', (2, 'b'))) ) -- !query analysis -Project [year#x, {dotNET, {1, a}}#xL, {Java, {2, b}}#xL] -+- Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, {1, a}}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, {2, b}}#xL] - +- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,[1,a]], [Java,[2,b]], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] - +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, year#x, s#x] - +- Join Inner, (year#x = y#x) - :- SubqueryAlias coursesales - : +- View (`courseSales`, [course#x, year#x, earnings#x]) - : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - : +- Project [course#x, year#x, earnings#x] - : +- SubqueryAlias courseSales - : +- LocalRelation [course#x, year#x, earnings#x] - +- SubqueryAlias yearswithcomplextypes - +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) - +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] - +- Project [y#x, a#x, m#x, s#x] - +- SubqueryAlias yearsWithComplexTypes - +- LocalRelation [y#x, a#x, m#x, s#x] +Project [year#x, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[0] AS {dotNET, {1, a}}#xL, __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x[1] AS {Java, {2, b}}#xL] ++- Aggregate [year#x], [year#x, pivotfirst(__pivot_col#x, CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL, [dotNET,[1,a]], [Java,[2,b]], 0, 0) AS __pivot_CAST(udf(cast(sum(earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(earnings) as string)) AS BIGINT)`#x] + +- Aggregate [year#x, named_struct(course, course#x, s, s#x)], [year#x, named_struct(course, course#x, s, s#x) AS __pivot_col#x, cast(udf(cast(sum(earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, year#x, s#x] + +- Join Inner, (year#x = y#x) + :- SubqueryAlias coursesales + : +- View (`courseSales`, [course#x, year#x, earnings#x]) + : +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + : +- Project [course#x, year#x, earnings#x] + : +- SubqueryAlias courseSales + : +- LocalRelation [course#x, year#x, earnings#x] + +- SubqueryAlias yearswithcomplextypes + +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x]) + +- Project [cast(y#x as int) AS y#x, cast(a#x as array) AS a#x, cast(m#x as map) AS m#x, cast(s#x as struct) AS s#x] + +- Project [y#x, a#x, m#x, s#x] + +- SubqueryAlias yearsWithComplexTypes + +- LocalRelation [y#x, a#x, m#x, s#x] -- !query @@ -720,15 +703,14 @@ PIVOT ( FOR Course IN ('dotNET', 'Java') ) -- !query analysis -Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, dotNET#xL, Java#xL] -+- Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] - +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, pivotfirst(Course#x, CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x] - +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x, cast(udf(cast(sum(Earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)#xL] - +- SubqueryAlias __auto_generated_subquery_name - +- Project [course#x, earnings#x, cast(udf(cast(a as string)) as string) AS a#x, cast(udf(cast(z as string)) as string) AS z#x, cast(udf(cast(b as string)) as string) AS b#x, cast(udf(cast(y as string)) as string) AS y#x, cast(udf(cast(c as string)) as string) AS c#x, cast(udf(cast(x as string)) as string) AS x#x, cast(udf(cast(d as string)) as string) AS d#x, cast(udf(cast(w as string)) as string) AS w#x] - +- SubqueryAlias coursesales - +- View (`courseSales`, [course#x, year#x, earnings#x]) - +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] - +- Project [course#x, year#x, earnings#x] - +- SubqueryAlias courseSales - +- LocalRelation [course#x, year#x, earnings#x] +Project [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x[0] AS dotNET#xL, __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x[1] AS Java#xL] ++- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, pivotfirst(Course#x, CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(sum(Earnings) as string)) AS BIGINT) AS `CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)`#x] + +- Aggregate [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x], [a#x, z#x, b#x, y#x, c#x, x#x, d#x, w#x, Course#x, cast(udf(cast(sum(Earnings#x) as string)) as bigint) AS CAST(udf(cast(sum(Earnings) as string)) AS BIGINT)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [course#x, earnings#x, cast(udf(cast(a as string)) as string) AS a#x, cast(udf(cast(z as string)) as string) AS z#x, cast(udf(cast(b as string)) as string) AS b#x, cast(udf(cast(y as string)) as string) AS y#x, cast(udf(cast(c as string)) as string) AS c#x, cast(udf(cast(x as string)) as string) AS x#x, cast(udf(cast(d as string)) as string) AS d#x, cast(udf(cast(w as string)) as string) AS w#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out index 5a74c4be107e..7a4db458f862 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out @@ -39,80 +39,76 @@ CreateViewCommand `nt2`, select * from values SELECT * FROM nt1 left outer join nt2 using (k) -- !query analysis Project [k#x, v1#x, v2#x] -+- Project [k#x, v1#x, v2#x] - +- Join LeftOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k FROM nt1 left outer join nt2 using (k) -- !query analysis Project [k#x] -+- Project [k#x, v1#x, v2#x] - +- Join LeftOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.*, nt2.* FROM nt1 left outer join nt2 using (k) -- !query analysis Project [k#x, v1#x, k#x, v2#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join LeftOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.k, nt2.k FROM nt1 left outer join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join LeftOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -183,220 +179,209 @@ Sort [k#x ASC NULLS FIRST], true SELECT k, nt1.k FROM nt1 left outer join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x] - +- Join LeftOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k, nt2.k FROM nt1 left outer join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join LeftOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT * FROM nt1 left semi join nt2 using (k) -- !query analysis Project [k#x, v1#x] -+- Project [k#x, v1#x] - +- Join LeftSemi, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftSemi, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k FROM nt1 left semi join nt2 using (k) -- !query analysis Project [k#x] -+- Project [k#x, v1#x] - +- Join LeftSemi, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftSemi, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.* FROM nt1 left semi join nt2 using (k) -- !query analysis Project [k#x, v1#x] -+- Project [k#x, v1#x] - +- Join LeftSemi, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftSemi, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.k FROM nt1 left semi join nt2 using (k) -- !query analysis Project [k#x] -+- Project [k#x, v1#x] - +- Join LeftSemi, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftSemi, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k, nt1.k FROM nt1 left semi join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x] - +- Join LeftSemi, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join LeftSemi, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT * FROM nt1 right outer join nt2 using (k) -- !query analysis Project [k#x, v1#x, v2#x] -+- Project [k#x, v1#x, v2#x] - +- Join RightOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join RightOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k FROM nt1 right outer join nt2 using (k) -- !query analysis Project [k#x] -+- Project [k#x, v1#x, v2#x] - +- Join RightOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join RightOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.*, nt2.* FROM nt1 right outer join nt2 using (k) -- !query analysis Project [k#x, v1#x, k#x, v2#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join RightOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join RightOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.k, nt2.k FROM nt1 right outer join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join RightOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join RightOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -446,120 +431,114 @@ Sort [key#x ASC NULLS FIRST], true SELECT k, nt1.k FROM nt1 right outer join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join RightOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join RightOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k, nt2.k FROM nt1 right outer join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x] - +- Join RightOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join RightOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT * FROM nt1 full outer join nt2 using (k) -- !query analysis -Project [k#x, v1#x, v2#x] -+- Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x] - +- Join FullOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] +Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x] ++- Join FullOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k FROM nt1 full outer join nt2 using (k) -- !query analysis -Project [k#x] -+- Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x] - +- Join FullOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] +Project [coalesce(k#x, k#x) AS k#x] ++- Join FullOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.*, nt2.* FROM nt1 full outer join nt2 using (k) -- !query analysis Project [k#x, v1#x, k#x, v2#x] -+- Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x, k#x, k#x] - +- Join FullOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join FullOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.k, nt2.k FROM nt1 full outer join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x, k#x, k#x] - +- Join FullOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join FullOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -608,121 +587,115 @@ Sort [key#x ASC NULLS FIRST], true -- !query SELECT k, nt1.k FROM nt1 full outer join nt2 using (k) -- !query analysis -Project [k#x, k#x] -+- Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x, k#x, k#x] - +- Join FullOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] +Project [coalesce(k#x, k#x) AS k#x, k#x] ++- Join FullOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k, nt2.k FROM nt1 full outer join nt2 using (k) -- !query analysis -Project [k#x, k#x] -+- Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x, k#x, k#x] - +- Join FullOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] +Project [coalesce(k#x, k#x) AS k#x, k#x] ++- Join FullOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT * FROM nt1 full outer join nt2 using (k) -- !query analysis -Project [k#x, v1#x, v2#x] -+- Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x] - +- Join FullOuter, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] +Project [coalesce(k#x, k#x) AS k#x, v1#x, v2#x] ++- Join FullOuter, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k FROM nt1 inner join nt2 using (k) -- !query analysis Project [k#x] -+- Project [k#x, v1#x, v2#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.*, nt2.* FROM nt1 inner join nt2 using (k) -- !query analysis Project [k#x, v1#x, k#x, v2#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT nt1.k, nt2.k FROM nt1 inner join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query @@ -772,40 +745,38 @@ Sort [key#x ASC NULLS FIRST], true SELECT k, nt1.k FROM nt1 inner join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query SELECT k, nt2.k FROM nt1 inner join nt2 using (k) -- !query analysis Project [k#x, k#x] -+- Project [k#x, v1#x, v2#x, k#x] - +- Join Inner, (k#x = k#x) - :- SubqueryAlias nt1 - : +- View (`nt1`, [k#x, v1#x]) - : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] - : +- Project [k#x, v1#x] - : +- SubqueryAlias nt1 - : +- LocalRelation [k#x, v1#x] - +- SubqueryAlias nt2 - +- View (`nt2`, [k#x, v2#x]) - +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] - +- Project [k#x, v2#x] - +- SubqueryAlias nt2 - +- LocalRelation [k#x, v2#x] ++- Join Inner, (k#x = k#x) + :- SubqueryAlias nt1 + : +- View (`nt1`, [k#x, v1#x]) + : +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x] + : +- Project [k#x, v1#x] + : +- SubqueryAlias nt1 + : +- LocalRelation [k#x, v1#x] + +- SubqueryAlias nt2 + +- View (`nt2`, [k#x, v2#x]) + +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x] + +- Project [k#x, v2#x] + +- SubqueryAlias nt2 + +- LocalRelation [k#x, v2#x] -- !query diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala index bda8c7f26082..09dd9eb95d84 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql import org.scalatest.concurrent.TimeLimits import org.scalatest.time.SpanSugar._ +import org.apache.spark.sql.execution.{ProjectExec, UnaryExecNode} import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.execution.columnar.{InMemoryRelation, InMemoryTableScanExec} import org.apache.spark.sql.functions._ @@ -244,15 +245,26 @@ class DatasetCacheSuite extends QueryTest case i: InMemoryRelation => i.cacheBuilder.cachedPlan } assert(df1LimitInnerPlan.isDefined && df1LimitInnerPlan.get == df1InnerPlan) - - // Verify that df2's cache has been re-cached, with a new physical plan rid of dependency - // on df, since df2's cache had not been loaded before df.unpersist(). val df2Limit = df2.limit(2) val df2LimitInnerPlan = df2Limit.queryExecution.withCachedData.collectFirst { case i: InMemoryRelation => i.cacheBuilder.cachedPlan } + // The assertion below is incorrect in context of bug SPARK-47609. + // as df2 is derivable from df1 ( which is an InMemoryRelation) + + /* + // Verify that df2's cache has been re-cached, with a new physical plan rid of dependency + // on df, since df2's cache had not been loaded before df.unpersist(). assert(df2LimitInnerPlan.isDefined && !df2LimitInnerPlan.get.exists(_.isInstanceOf[InMemoryTableScanExec])) + */ + assert(df2LimitInnerPlan.isDefined) + val innerImr = df2LimitInnerPlan.get.collectFirst { + case imrExec: InMemoryTableScanExec => imrExec.relation + } + assert(innerImr.isDefined) + assert(innerImr.get.cacheBuilder.cachedPlan.asInstanceOf[UnaryExecNode]. + child.isInstanceOf[ProjectExec]) } test("SPARK-27739 Save stats from optimized plan") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala new file mode 100644 index 000000000000..036a102ba77d --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectSuite.scala @@ -0,0 +1,366 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan} +import org.apache.spark.sql.execution.SparkPlan +import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper +import org.apache.spark.sql.execution.analysis.EarlyCollapseProject +import org.apache.spark.sql.execution.columnar.{InMemoryRelation, InMemoryTableScanExec} +import org.apache.spark.sql.functions.{col, lit} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession + +class EarlyCollapseProjectSuite extends QueryTest + with SharedSparkSession with AdaptiveSparkPlanHelper { + import testImplicits._ + val useCaching: Boolean = false + + test("withColumns: check no new project addition for simple columns addition") { + val baseDfCreator = () => spark.range(20).select($"id" as "a", $"id" as "b") + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, + df => df.withColumns(Seq("newCol1", "newCol2"), Seq(col("a") + 1, col("b") + 2)), + (1, 2), (1, 1)) + } + + test("withColumns: check no new project addition if redefined alias is not used in" + + " new columns") { + val baseDfCreator = () => spark.range(20).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "a", $"b") + + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, + df => df.withColumns(Seq("newCol1"), Seq(col("b") + 2)), (1, 2), (1, 1)) + } + + test("withColumns: no new project addition if redefined alias is used in new columns - 1") { + val baseDfCreator = () => spark.range(20).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "a", $"b") + + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, + df => df.withColumns(Seq("newCol1"), Seq(col("a") + 2)), (1, 2), (1, 1)) + } + + test("withColumns: no new project addition if redefined alias is used in new columns - 2") { + val baseDfCreator = () => spark.range(20).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b"). + select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e") + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, + df => df.withColumns(Seq("newCol1"), Seq(col("c") + 2 + col("a") * col("e"))), (1, 2), (1, 1)) + } + + test("withColumnRenamed: remap of column should not result in new project if the source" + + " of remap is not used in other cols") { + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b") + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, + df => df.withColumnRenamed("a", "c"), (1, 1), (0, 0)) + } + + test("withColumnRenamed: remap of column should not result in new project if the source" + + " of remap is an attribute used in other cols") { + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b") + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, + df => df.withColumnRenamed("a", "d"), (1, 1), (0, 0)) + } + + + test("withColumnRenamed: remap of column should not result in new project if the remap" + + " is on an alias") { + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d" ) + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, df => df.withColumnRenamed("d", "x"), + (1, 1), (0, 0)) + } + + test("withColumnRenamed: remap of column should not result in new project if the remap" + + " source an alias and that attribute is also projected as another attribute") { + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d"). + select($"c", $"a", $"b", $"d", $"d" as "k") + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, df => df.withColumnRenamed("d", "x"), + (1, 1), (0, 0)) + } + + + test("withColumnRenamed: test multi column remap") { + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, + df => df.withColumnsRenamed(Map("d" -> "x", "c" -> "k", "a" -> "u")), (1, 1), (0, 0)) + } + + test("withColumns: test multi column addition") { + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, + df => df.withColumns( + Seq("newCol1", "newCol2", "newCol3", "newCol4"), + Seq(col("a") + 2, col("b") + 7, col("a") + col("b"), col("a") + col("d"))), (1, 2), (1, 1)) + } + + test("mix of column addition, rename and dropping") { + val baseDfCreator = () => spark.range(100).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, + df => df.select($"a" + $"d" as "newCol1", $"b" * $"a" as "newCol2", + $"a" as "renameCola", $"c" * $"d" as "c", $"a"), (1, 2), (1, 1)) + } + + + test("mix of column addition, rename and dropping - 1") { + val baseDfCreator = () => spark.range(100).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, + df => df.select($"c" * $"d" as "c", $"a" + $"d" as "newCol1", $"b" * $"a" as "newCol2", + $"a" as "renameCola", $"a"), (1, 2), (1, 1)) + } + + test("mix of column addition, rename and dropping - 2") { + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" + 5 as "b"). + select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, + df => df.select($"d", $"b" as "renameB", $"a" as "renameA", $"a" as "renameColA"), + (1, 2), (1, 1)) + } + + + test("mix of column addition, rename and dropping - 3") { + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" + 5 as "b"). + select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, + df => df.select($"d" * $"a" as "d", $"b" as "renameB", $"a" * $"d" as "renameA", + $"a" as "renameColA"), (1, 2), (1, 1)) + } + + + test("mix of column addition, rename and dropping - 4") { + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" + 5 as "b"). + select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, df => df.select($"c"), + (1, 2), (0, 1)) + } + + + test("mix of column addition, rename and dropping - 5") { + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" + 5 as "b"). + select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, df => df.select($"d" * 7 as "a"), + (1, 2), (0, 1)) + } + + + test("mix of column addition, rename and dropping - 6") { + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" + 5 as "b"). + select($"a" + $"b" as "c", $"a", $"b").select($"c", $"a", $"b", $"c" * $"a" * $"b" as "d") + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, + df => df.select($"d" * 7 as "a", $"d" * 7 as "b", $"b" + $"a" as "e"), (1, 2), (0, 1)) + } + + test("mix of column addition, rename and dropping - 7") { + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" + 5 as "b"). + select($"a" + $"b" as "c", $"a", $"b").select( lit(9) as "e", $"c", lit(11) as "a", $"b", + $"c" * $"a" * $"b" as "d") + checkProjectCollapseCacheUseAndInvalidation( + baseDfCreator, + df => df.select($"a" as "a1", lit(7) as "d1", $"b" as "b1", $"c" * $"a" as "c", + lit(13) as "f"), (1, 2), (0, 1)) + } + + test("new columns added do not result in new project -1") { + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, df => df.withColumns( + Seq("newCol1", "newCol2", "newCol3", "newCol4"), + Seq(col("a") + 2, col("b") + 7, col("a") + col("b"), col("a") + col("d"))), + (1, 2), (1, 1)) + } + + test("new columns added do not result in new project -2") { + val baseDfCreator = () => spark.range(20).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b"). + select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e") + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, + df => df.withColumns(Seq("newCol1"), Seq(col("c") + 2 + col("a") * col("e"))), + (1, 2), (1, 1)) + } + + test("new columns added do not result in new project, with positions changed") { + val baseDfCreator = () => spark.range(20).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b"). + select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e") + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, + df => df.select( $"e", $"a", $"c" + 2 + $"a" * $"e" as "newCol", $"c", $"d", $"b"), + (1, 2), (1, 1)) + } + + + test("renamed columns do not result in new project") { + val baseDfCreator = () => spark.range(10).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b").select($"c", $"a", $"b", $"c" + 7 as "d") + + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, df => df.withColumnsRenamed( + Map("c" -> "c1", "a" -> "a1", "b" -> "b1", "d" -> "d1")), (1, 1), (0, 0)) + } + + test("early collapse of filter chain with project - 1") { + val baseDfCreator = () => spark.range(100).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b") + + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, df => df.filter($"a" > 4). + filter($"c" * $"b" < 60). + select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e"), + (1, 2), (0, 1)) + } + + test("early collapse of filter chain with project - 2") { + val baseDfCreator = () => spark.range(100).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b").filter($"a" > 4).filter($"c" * $"b" < 60) + + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, df => df.filter($"b" < 100). + select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e"), + (1, 2), (0, 1)) + } + + test("resurrection of intermediate dropped cols when used in filter") { + val baseDfCreator = () => spark.range(100).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"b").select($"c", $"b", $"c" + 7 as "d") + // A dropped column would result in a new project being added on top of filter + // so we have to take into account of that extra project added while checking + // assertion of init node size and optimized df nodes size + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, df => df.withColumnsRenamed( + Map("c" -> "c1", "b" -> "b1", "d" -> "d1")).filter($"a" > 5), (1, 2), (0, 1)) + } + + test("resurrection of right renamed intermediate dropped cols when used in filter") { + val baseDfCreator = () => spark.range(100).select($"id" + 7 as "a", $"id" as "b"). + select($"a" + 1 as "c", $"b", $"a" * $"b" as "a").select($"c", $"b", $"c" + 7 as "d") + // A dropped column would result in a new project being added on top of filter + // so we have to take into account of that extra project added while checking + // assertion of init node size and optimized df nodes size + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, df => df.withColumnsRenamed( + Map("c" -> "c1", "b" -> "b1", "d" -> "d1")).select($"c1", $"d1").filter($"a" > 25), + (1, 2), (0, 1)) + } + + protected def checkProjectCollapseCacheUseAndInvalidation( + baseDfCreator: () => DataFrame, + testExec: DataFrame => DataFrame, + baseAndDerivedIMRsOnCache: (Int, Int), + baseAndDerivedIMRsOnBaseInvalidation: (Int, Int)): Unit = { + // now check if the results of optimized dataframe and completely unoptimized dataframe are + // same + val fullyUnoptBase = withSQLConf( + SQLConf.EXCLUDE_POST_ANALYSIS_RULES.key -> EarlyCollapseProject.ruleName) { + baseDfCreator() + } + + val fullyUnoptTest = withSQLConf( + SQLConf.EXCLUDE_POST_ANALYSIS_RULES.key -> EarlyCollapseProject.ruleName) { + testExec(baseDfCreator()) + } + + val baseDfRows = fullyUnoptBase.collect() + val testDfRows = fullyUnoptTest.collect() + + val baseDf = baseDfCreator() + if (useCaching) { + baseDf.cache() + assertCacheDependency(baseDfCreator(), 1) + assertCacheDependency(testExec(baseDfCreator()), 1) + baseDfCreator().unpersist(true) + assertCacheDependency(baseDfCreator(), 0) + assertCacheDependency(testExec(baseDfCreator()), 0) + baseDfCreator().cache() + } + val initNodes = collectNodes(baseDf) + val (newDfOpt, newDfUnopt) = getComparableDataFrames(baseDf, testExec) + val optDfNodes = collectNodes(newDfOpt) + val nonOptDfNodes = collectNodes(newDfUnopt) + val foundFilterNodes = optDfNodes.exists(_.isInstanceOf[Filter]) + if (!foundFilterNodes) { + assert(initNodes.size === optDfNodes.size) + } + assert(nonOptDfNodes.size > optDfNodes.size) + checkAnswer(newDfOpt, newDfUnopt) + if (useCaching) { + assert(newDfOpt.queryExecution.optimizedPlan.collectLeaves().head. + isInstanceOf[InMemoryRelation]) + } + + assert(collectNodes(fullyUnoptTest).size >= nonOptDfNodes.size) + checkAnswer(newDfOpt, fullyUnoptTest) + + if (useCaching) { + // first unpersist both dataframes + baseDf.unpersist(true) + newDfOpt.unpersist(true) + baseDf.cache() + newDfOpt.cache() + assertCacheDependency(baseDfCreator(), baseAndDerivedIMRsOnCache._1) + assertCacheDependency(testExec(baseDfCreator()), baseAndDerivedIMRsOnCache._2) + checkAnswer(baseDfCreator(), baseDfRows) + checkAnswer(testExec(baseDfCreator()), testDfRows) + baseDf.unpersist(true) + newDfOpt.unpersist(true) + baseDfCreator().cache() + testExec(baseDfCreator()).cache() + baseDfCreator().unpersist(true) + assertCacheDependency(baseDfCreator(), baseAndDerivedIMRsOnBaseInvalidation._1) + assertCacheDependency(testExec(baseDfCreator()), baseAndDerivedIMRsOnBaseInvalidation._2) + checkAnswer(baseDfCreator(), baseDfRows) + checkAnswer(testExec(baseDfCreator()), testDfRows) + // recache base df so that if existing tests want to continue should work fine + newDfOpt.unpersist(true) + baseDfCreator().cache() + } + } + + private def getComparableDataFrames( + baseDf: DataFrame, + transformation: DataFrame => DataFrame): (DataFrame, DataFrame) = { + // first obtain optimized transformation which avoids adding new project + val newDfOpt = transformation(baseDf) + // then obtain optimized transformation which adds new project + + val newDfUnopt = withSQLConf( + SQLConf.EXCLUDE_POST_ANALYSIS_RULES.key -> EarlyCollapseProject.ruleName) { + transformation(baseDf) + } + (newDfOpt, newDfUnopt) + } + + private def collectNodes(df: DataFrame): Seq[LogicalPlan] = df.logicalPlan.collect { + case l => l + } + + def assertCacheDependency(df: DataFrame, numOfCachesExpected: Int): Unit = { + val cachedPlans = df.queryExecution.withCachedData.collect { + case i: InMemoryRelation => i.cacheBuilder.cachedPlan + } + val totalIMRs = cachedPlans.size + cachedPlans.map(ime => recurse(ime)).sum + assert(totalIMRs == numOfCachesExpected) + } + + private def recurse(sparkPlan: SparkPlan): Int = { + val imrs = sparkPlan.collect { + case i: InMemoryTableScanExec => i + } + imrs.size + imrs.map(ime => recurse(ime.relation.cacheBuilder.cachedPlan)).sum + } +} + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectWithCachingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectWithCachingSuite.scala new file mode 100644 index 000000000000..20165eb121f0 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/EarlyCollapseProjectWithCachingSuite.scala @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.sql.execution.analysis.EarlyCollapseProject +import org.apache.spark.sql.execution.columnar.InMemoryRelation +import org.apache.spark.sql.internal.SQLConf + +class EarlyCollapseProjectWithCachingSuite extends EarlyCollapseProjectSuite { + import testImplicits._ + override val useCaching: Boolean = true + + test("check for nested InMemoryRelations") { + val baseDfCreator = () => spark.range(1000).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b").filter($"a" > 4).filter($"c" * $"b" < 60) + + checkProjectCollapseCacheUseAndInvalidation(baseDfCreator, df => df.filter($"b" < 100). + select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e"), + (1, 2), (0, 1)) + + // there is already a cached base Df + val df1 = baseDfCreator().filter($"b" < 100). + select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e") + df1.cache() + + val df2 = baseDfCreator().filter($"b" < 100). + select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e"). + select($"c" * $"a" as "c", $"c" * $"b" as "a", $"e").filter($"c" > 73). + filter($"d" < 300) + val rows = df2.collect() + assert(rows.length > 0) + // there should be 2 nested In Memory Relations + val optimizedPlan = df2.queryExecution.optimizedPlan + val leaf1 = optimizedPlan.collectLeaves().head + assert(leaf1.isInstanceOf[InMemoryRelation]) + val imr1 = leaf1.asInstanceOf[InMemoryRelation] + val leaf2 = imr1.queryExecution.optimizedPlan.collectLeaves().head + assert(leaf2.isInstanceOf[InMemoryRelation]) + df1.unpersist() + baseDfCreator().unpersist() + val fullyUnopt = withSQLConf( + SQLConf.EXCLUDE_POST_ANALYSIS_RULES.key -> EarlyCollapseProject.ruleName) { + baseDfCreator().filter($"b" < 100). + select($"c" + $"a" as "c", $"a" + 3 as "a", $"b", $"c" + 7 as "d", $"a" - $"b" as "e"). + select($"c" * $"a" as "c", $"c" * $"b" as "a", $"e").filter($"c" > 73). + filter($"d" < 300) + } + checkAnswer(fullyUnopt, rows) + } + + test("check cached plan invalidation when subplan is uncached") { + val baseDf = spark.range(1000).select($"id" as "a", $"id" as "b"). + select($"a" + 1 as "c", $"a", $"b").filter($"a" > 4) + val df1 = baseDf.withColumn("d", $"a" + 1 + $"b") + baseDf.cache() + // Add df1 to the CacheManager; the buffer is currently empty. + df1.cache() + assertCacheDependency(df1, 2) + // removal of InMemoryRelation of base Df should result in the removal of dependency of df1 + baseDf.unpersist(blocking = true) + assertCacheDependency(df1.limit(1000), 1) + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala index f5ba655e3e85..df4f443b0e22 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala @@ -236,7 +236,8 @@ abstract class QueryTest extends PlanTest { def assertEmptyMissingInput(query: Dataset[_]): Unit = { assert(query.queryExecution.analyzed.missingInput.isEmpty, s"The analyzed logical plan has missing inputs:\n${query.queryExecution.analyzed}") - assert(query.queryExecution.optimizedPlan.missingInput.isEmpty, + assert(query.queryExecution.optimizedPlan.children.isEmpty || + query.queryExecution.optimizedPlan.missingInput.isEmpty, s"The optimized logical plan has missing inputs:\n${query.queryExecution.optimizedPlan}") assert(query.queryExecution.executedPlan.missingInput.isEmpty, s"The physical plan has missing inputs:\n${query.queryExecution.executedPlan}") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala index 506b44741ab4..d872fdac5cfb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.execution.command import org.apache.spark.sql.{AnalysisException, QueryTest, Row} +import org.apache.spark.sql.execution.CacheManager import org.apache.spark.storage.StorageLevel /** @@ -73,7 +74,8 @@ trait AlterTableRenameSuiteBase extends QueryTest with DDLCommandTestUtils { def getStorageLevel(tableName: String): StorageLevel = { val table = spark.table(tableName) val cachedData = spark.sharedState.cacheManager.lookupCachedData(table).get - cachedData.cachedRepresentation.cacheBuilder.storageLevel + cachedData.cachedRepresentation.fold(CacheManager.inMemoryRelationExtractor, identity). + cacheBuilder.storageLevel } sql(s"CREATE TABLE $src (c0 INT) $defaultUsing") sql(s"INSERT INTO $src SELECT 0") diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala index 979ff1e24ef5..30c7c89a3701 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala @@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.SparkPlanner import org.apache.spark.sql.execution.aggregate.ResolveEncodersInScalaAgg -import org.apache.spark.sql.execution.analysis.DetectAmbiguousSelfJoin +import org.apache.spark.sql.execution.analysis.{DetectAmbiguousSelfJoin, EarlyCollapseProject} import org.apache.spark.sql.execution.command.CommandCheck import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.execution.datasources.v2.TableCapabilityCheck @@ -108,6 +108,9 @@ class HiveSessionStateBuilder( ReplaceCharWithVarchar +: customPostHocResolutionRules + override val postAnalysisEarlyOptimizationRules: Seq[Rule[LogicalPlan]] = + EarlyCollapseProject +: Nil + override val extendedCheckRules: Seq[LogicalPlan => Unit] = PreWriteCheck +: PreReadCheck +: