-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-30651][SQL] Add detailed information for Aggregate operators in EXPLAIN FORMATTED #27368
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
2b48f4b
4ff35f9
426b953
65aea2a
2cc5daf
5e5d481
70cb2df
5b91b19
dd0988a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,7 +17,7 @@ | |
|
|
||
| package org.apache.spark.sql.execution.aggregate | ||
|
|
||
| import org.apache.spark.sql.catalyst.expressions.NamedExpression | ||
| import org.apache.spark.sql.catalyst.expressions.{Attribute, NamedExpression} | ||
| import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression | ||
| import org.apache.spark.sql.execution.{ExplainUtils, UnaryExecNode} | ||
|
|
||
|
|
@@ -27,24 +27,21 @@ import org.apache.spark.sql.execution.{ExplainUtils, UnaryExecNode} | |
| abstract class BaseAggregateExec extends UnaryExecNode { | ||
| val groupingExpressions: Seq[NamedExpression] | ||
| val aggregateExpressions: Seq[AggregateExpression] | ||
| val aggregateAttributes: Seq[Attribute] | ||
| val resultExpressions: Seq[NamedExpression] | ||
|
||
|
|
||
| protected val aggregateBufferAttributes = { | ||
| aggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes) | ||
| } | ||
|
|
||
| override def verboseStringWithOperatorId(): String = { | ||
| val inputString = child.output.mkString("[", ", ", "]") | ||
| val keyString = groupingExpressions.mkString("[", ", ", "]") | ||
| val functionString = aggregateExpressions.mkString("[", ", ", "]") | ||
| val funcBufferAttrString = aggregateBufferAttributes.mkString("[", ", ", "]") | ||
| val aggregateAttributeString = aggregateAttributes.mkString("[", ", ", "]") | ||
| val resultString = resultExpressions.mkString("[", ", ", "]") | ||
| s""" | ||
| |(${ExplainUtils.getOpId(this)}) $nodeName ${ExplainUtils.getCodegenId(this)} | ||
| |Input: $inputString | ||
| |Keys: $keyString | ||
| |Functions: $functionString | ||
| |FuncBufferAttrs: $funcBufferAttrString | ||
| |Aggregate Attributes: $aggregateAttributeString | ||
| |Results: $resultString | ||
| """.stripMargin | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -86,7 +86,7 @@ Input : [key#x, val#x] | |
| Input: [key#x, val#x] | ||
| Keys: [key#x] | ||
| Functions: [partial_max(val#x)] | ||
| FuncBufferAttrs: [max#x] | ||
| Aggregate Attributes: [max#x] | ||
| Results: [key#x, max#x] | ||
|
|
||
| (6) Exchange | ||
|
|
@@ -96,7 +96,7 @@ Input: [key#x, max#x] | |
| Input: [key#x, max#x] | ||
| Keys: [key#x] | ||
| Functions: [max(val#x)] | ||
| FuncBufferAttrs: [max#x] | ||
| Aggregate Attributes: [max(val#x)#x] | ||
| Results: [key#x, max(val#x)#x AS max(val)#x] | ||
cloud-fan marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| (8) Exchange | ||
|
|
@@ -150,7 +150,7 @@ Input : [key#x, val#x] | |
| Input: [key#x, val#x] | ||
| Keys: [key#x] | ||
| Functions: [partial_max(val#x)] | ||
| FuncBufferAttrs: [max#x] | ||
| Aggregate Attributes: [max#x] | ||
| Results: [key#x, max#x] | ||
|
|
||
| (6) Exchange | ||
|
|
@@ -160,7 +160,7 @@ Input: [key#x, max#x] | |
| Input: [key#x, max#x] | ||
| Keys: [key#x] | ||
| Functions: [max(val#x)] | ||
| FuncBufferAttrs: [max#x] | ||
| Aggregate Attributes: [max(val#x)#x] | ||
| Results: [key#x, max(val#x)#x AS max(val)#x, max(val#x)#x AS max(val#x)#x] | ||
|
|
||
| (8) Filter [codegen id : 2] | ||
|
|
@@ -237,7 +237,7 @@ Input : [key#x, val#x] | |
| Input: [key#x, val#x] | ||
| Keys: [key#x, val#x] | ||
| Functions: [] | ||
| FuncBufferAttrs: [] | ||
| Aggregate Attributes: [] | ||
| Results: [key#x, val#x] | ||
|
|
||
| (11) Exchange | ||
|
|
@@ -247,7 +247,7 @@ Input: [key#x, val#x] | |
| Input: [key#x, val#x] | ||
| Keys: [key#x, val#x] | ||
| Functions: [] | ||
| FuncBufferAttrs: [] | ||
| Aggregate Attributes: [] | ||
| Results: [key#x, val#x] | ||
|
|
||
|
|
||
|
|
@@ -447,7 +447,7 @@ Input : [key#x, val#x] | |
| Input: [key#x] | ||
| Keys: [] | ||
| Functions: [partial_max(key#x)] | ||
| FuncBufferAttrs: [max#x] | ||
| Aggregate Attributes: [max#x] | ||
| Results: [max#x] | ||
|
|
||
| (10) Exchange | ||
|
|
@@ -457,7 +457,7 @@ Input: [max#x] | |
| Input: [max#x] | ||
| Keys: [] | ||
| Functions: [max(key#x)] | ||
| FuncBufferAttrs: [max#x] | ||
| Aggregate Attributes: [max(key#x)#x] | ||
| Results: [max(key#x)#x AS max(key)#x] | ||
|
|
||
| Subquery:2 Hosting operator id = 7 Hosting Expression = Subquery scalar-subquery#x, [id=#x] | ||
|
|
@@ -492,7 +492,7 @@ Input : [key#x, val#x] | |
| Input: [key#x] | ||
| Keys: [] | ||
| Functions: [partial_max(key#x)] | ||
| FuncBufferAttrs: [max#x] | ||
| Aggregate Attributes: [max#x] | ||
| Results: [max#x] | ||
|
|
||
| (17) Exchange | ||
|
|
@@ -502,7 +502,7 @@ Input: [max#x] | |
| Input: [max#x] | ||
| Keys: [] | ||
| Functions: [max(key#x)] | ||
| FuncBufferAttrs: [max#x] | ||
| Aggregate Attributes: [max(key#x)#x] | ||
| Results: [max(key#x)#x AS max(key)#x] | ||
|
|
||
|
|
||
|
|
@@ -573,7 +573,7 @@ Input : [key#x, val#x] | |
| Input: [key#x] | ||
| Keys: [] | ||
| Functions: [partial_max(key#x)] | ||
| FuncBufferAttrs: [max#x] | ||
| Aggregate Attributes: [max#x] | ||
| Results: [max#x] | ||
cloud-fan marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| (9) Exchange | ||
|
|
@@ -583,7 +583,7 @@ Input: [max#x] | |
| Input: [max#x] | ||
| Keys: [] | ||
| Functions: [max(key#x)] | ||
| FuncBufferAttrs: [max#x] | ||
| Aggregate Attributes: [max(key#x)#x] | ||
| Results: [max(key#x)#x AS max(key)#x] | ||
|
|
||
| Subquery:2 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#x, [id=#x] | ||
|
|
@@ -618,7 +618,7 @@ Input : [key#x, val#x] | |
| Input: [key#x] | ||
| Keys: [] | ||
| Functions: [partial_avg(cast(key#x as bigint))] | ||
| FuncBufferAttrs: [sum#x, count#xL] | ||
| Aggregate Attributes: [sum#x, count#xL] | ||
| Results: [sum#x, count#xL] | ||
|
|
||
| (16) Exchange | ||
|
|
@@ -628,7 +628,7 @@ Input: [sum#x, count#xL] | |
| Input: [sum#x, count#xL] | ||
| Keys: [] | ||
| Functions: [avg(cast(key#x as bigint))] | ||
| FuncBufferAttrs: [sum#x, count#xL] | ||
| Aggregate Attributes: [avg(cast(key#x as bigint))#x] | ||
| Results: [avg(cast(key#x as bigint))#x AS avg(key)#x] | ||
|
|
||
|
|
||
|
|
@@ -681,7 +681,7 @@ Input: [key#x] | |
| Input: [key#x] | ||
| Keys: [] | ||
| Functions: [partial_avg(cast(key#x as bigint))] | ||
| FuncBufferAttrs: [sum#x, count#xL] | ||
| Aggregate Attributes: [sum#x, count#xL] | ||
| Results: [sum#x, count#xL] | ||
|
|
||
| (7) Exchange | ||
|
|
@@ -691,7 +691,7 @@ Input: [sum#x, count#xL] | |
| Input: [sum#x, count#xL] | ||
| Keys: [] | ||
| Functions: [avg(cast(key#x as bigint))] | ||
| FuncBufferAttrs: [sum#x, count#xL] | ||
| Aggregate Attributes: [avg(cast(key#x as bigint))#x] | ||
| Results: [avg(cast(key#x as bigint))#x AS avg(key)#x] | ||
|
|
||
| Subquery:2 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#x, [id=#x] | ||
|
|
@@ -814,7 +814,7 @@ Input : [key#x, val#x] | |
| Input: [key#x, val#x] | ||
| Keys: [key#x] | ||
| Functions: [partial_max(val#x)] | ||
| FuncBufferAttrs: [max#x] | ||
| Aggregate Attributes: [max#x] | ||
| Results: [key#x, max#x] | ||
|
|
||
| (6) Exchange | ||
|
|
@@ -824,7 +824,7 @@ Input: [key#x, max#x] | |
| Input: [key#x, max#x] | ||
| Keys: [key#x] | ||
| Functions: [max(val#x)] | ||
| FuncBufferAttrs: [max#x] | ||
| Aggregate Attributes: [max(val#x)#x] | ||
| Results: [key#x, max(val#x)#x AS max(val)#x] | ||
|
|
||
| (8) ReusedExchange [Reuses operator id: 6] | ||
|
|
@@ -834,7 +834,7 @@ Output : ArrayBuffer(key#x, max#x) | |
| Input: [key#x, max#x] | ||
| Keys: [key#x] | ||
| Functions: [max(val#x)] | ||
| FuncBufferAttrs: [max#x] | ||
| Aggregate Attributes: [max(val#x)#x] | ||
| Results: [key#x, max(val#x)#x AS max(val)#x] | ||
|
|
||
| (10) BroadcastExchange | ||
|
|
@@ -900,7 +900,7 @@ Input: [key#x, val#x] | |
| Input: [key#x, val#x] | ||
| Keys: [] | ||
| Functions: [partial_count(val#x), partial_sum(cast(key#x as bigint)), partial_count(key#x) FILTER (WHERE (val#x > 1))] | ||
| FuncBufferAttrs: [count#xL, sum#xL, count#xL] | ||
| Aggregate Attributes: [count#xL, sum#xL, count#xL] | ||
| Results: [count#xL, sum#xL, count#xL] | ||
|
|
||
| (4) Exchange | ||
|
|
@@ -910,7 +910,7 @@ Input: [count#xL, sum#xL, count#xL] | |
| Input: [count#xL, sum#xL, count#xL] | ||
| Keys: [] | ||
| Functions: [count(val#x), sum(cast(key#x as bigint)), count(key#x)] | ||
| FuncBufferAttrs: [count#xL, sum#xL, count#xL] | ||
| Aggregate Attributes: [count(val#x)#xL, sum(cast(key#x as bigint))#xL, count(key#x)#xL] | ||
| Results: [(count(val#x)#xL + sum(cast(key#x as bigint))#xL) AS TOTAL#xL, count(key#x)#xL AS count(key) FILTER (WHERE (val > 1))#xL] | ||
|
|
||
|
|
||
|
|
@@ -943,7 +943,7 @@ Input: [key#x, val#x] | |
| Input: [key#x, val#x] | ||
| Keys: [key#x] | ||
| Functions: [partial_collect_set(val#x, 0, 0)] | ||
| FuncBufferAttrs: [buf#x] | ||
| Aggregate Attributes: [buf#x] | ||
| Results: [key#x, buf#x] | ||
|
|
||
| (4) Exchange | ||
|
|
@@ -953,7 +953,7 @@ Input: [key#x, buf#x] | |
| Input: [key#x, buf#x] | ||
| Keys: [key#x] | ||
| Functions: [collect_set(val#x, 0, 0)] | ||
| FuncBufferAttrs: [buf#x] | ||
| Aggregate Attributes: [collect_set(val#x, 0, 0)#x] | ||
cloud-fan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| Results: [key#x, sort_array(collect_set(val#x, 0, 0)#x, true)[0] AS sort_array(collect_set(val), true)[0]#x] | ||
|
||
|
|
||
|
|
||
|
|
@@ -991,7 +991,7 @@ Input: [key#x, val#x] | |
| Input: [key#x, val#x] | ||
| Keys: [key#x] | ||
| Functions: [partial_min(val#x)] | ||
| FuncBufferAttrs: [min#x] | ||
| Aggregate Attributes: [min#x] | ||
| Results: [key#x, min#x] | ||
|
|
||
| (5) Exchange | ||
|
|
@@ -1004,7 +1004,7 @@ Input: [key#x, min#x] | |
| Input: [key#x, min#x] | ||
| Keys: [key#x] | ||
| Functions: [min(val#x)] | ||
| FuncBufferAttrs: [min#x] | ||
| Aggregate Attributes: [min(val#x)#x] | ||
| Results: [key#x, min(val#x)#x AS min(val)#x] | ||
|
|
||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shall we make it
trait?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see, changed to
traitto make it consistent with other operators, e.g.HashJoinBaseLimitExec.