-
Notifications
You must be signed in to change notification settings - Fork 29k
[SQL] SPARK-6489: Optimize lateral view with explode to not unnecessary columns. #5358
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
1b29835
376d332
644f688
9e7aaec
8909a5d
6014acc
54abc3a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -171,8 +171,23 @@ object ColumnPruning extends Rule[LogicalPlan] { | |
|
|
||
| Project(substitutedProjection, child) | ||
|
|
||
| case gen@Generate(generator: Explode, isJoin, isOuter, alias, child) => | ||
| val allReferences = gen.references ++ gen.parentReferences | ||
| val pruneProject = prunedChild(child, allReferences) | ||
| Generate(generator, isJoin, isOuter, alias, pruneProject) | ||
|
|
||
| // Eliminate no-op Projects | ||
| case Project(projectList, child) if child.output == projectList => child | ||
|
|
||
| case plan => | ||
| plan.children.foreach { c => | ||
| c match { | ||
| case gen@Generate(generator: Explode, isJoin, isOuter, alias, child) => | ||
| gen.parentReferences = plan.references; | ||
| case _ => // nothing | ||
| } | ||
| } | ||
| plan | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change is no longer needed. |
||
| } | ||
|
|
||
| /** Applies a projection only when the child is producing unnecessary attributes */ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -55,6 +55,8 @@ case class Generate( | |
| child: LogicalPlan) | ||
| extends UnaryNode { | ||
|
|
||
| var parentReferences:AttributeSet = AttributeSet(Seq.empty) | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This mutable state is no longer needed. |
||
| protected def generatorOutput: Seq[Attribute] = { | ||
| val output = alias | ||
| .map(a => generator.output.map(_.withQualifiers(a :: Nil))) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| A, 20, 10:12:19 | ||
| B, 25, 7:8:4 | ||
| C, 19, 12:4:232 | ||
| D, 73, 243:53:7835 | ||
| E, 88, 1345:23:532532:353 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -87,6 +87,20 @@ class PruningSuite extends HiveComparisonTest with BeforeAndAfter { | |
| Seq("key"), | ||
| Seq.empty) | ||
|
|
||
| createPruningTest("Column pruning - explode with aggregate", | ||
| "SELECT name, sum(d) AS sumd FROM person LATERAL VIEW explode(data) d AS d GROUP BY name", | ||
| Seq("name", "sumd"), | ||
| Seq("data","name"), | ||
| Seq.empty) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we need to go all the way to hive to create an integration test. Instead create some unit tests in |
||
|
|
||
| createPruningTest("Column pruning - outer explode with limit", | ||
| "SELECT name FROM person LATERAL VIEW OUTER explode(data) outd AS d limit 3", | ||
| Seq("name"), | ||
| Seq("data","name"), | ||
| Seq.empty) | ||
|
|
||
|
|
||
|
|
||
| // Partition pruning tests | ||
|
|
||
| createPruningTest("Partition pruning - non-partitioned, non-trivial project", | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is that OK if we just do like
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Then we don't need other code change except the unit test.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok,thanks:)