Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
more optimization
  • Loading branch information
cloud-fan committed Aug 19, 2015
commit 6e4c6c8a22276231a7a5ea4cb54f71ddaf22a3a9
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,17 @@ object ColumnPruning extends Rule[LogicalPlan] {
case g: Generate if !g.join && (g.child.outputSet -- g.references).nonEmpty =>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we also handle the join case when there is a project on top of the generate (i.e. push the project through the generate)?

g.copy(child = Project(g.references.toSeq, g.child))

case p @ Project(_, g: Generate) if g.join && p.references.subsetOf(g.generatedSet) =>
p.copy(child = g.copy(join = false))

case p @ Project(projectList, g: Generate) if g.join =>
val neededChildOutput = p.references -- g.generatorOutput ++ g.references
if (neededChildOutput == g.child.outputSet) {
p
} else {
Project(projectList, g.copy(child = Project(neededChildOutput.toSeq, g.child)))
}

case p @ Project(projectList, a @ Aggregate(groupingExpressions, aggregateExpressions, child))
if (a.outputSet -- p.references).nonEmpty =>
Project(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,53 @@ class ColumnPruningSuite extends PlanTest {
ColumnPruning) :: Nil
}

test("Column pruning for Generate") {
test("Column pruning for Generate when Generate.join = false") {
val input = LocalRelation('a.int, 'b.array(StringType))

val query = Generate(Explode('b), false, false, None, 'b.string :: Nil, input).analyze
val query = Generate(Explode('b), false, false, None, 's.string :: Nil, input).analyze
val optimized = Optimize.execute(query)

val correctAnswer =
Generate(Explode('b), false, false, None, 'b.string :: Nil,
Generate(Explode('b), false, false, None, 's.string :: Nil,
Project('b.attr :: Nil, input)).analyze

comparePlans(optimized, correctAnswer)
}

test("Column pruning for Generate when Generate.join = true") {
val input = LocalRelation('a.int, 'b.int, 'c.array(StringType))

val query =
Project(Seq('a, 's),
Generate(Explode('c), true, false, None, 's.string :: Nil,
input)).analyze
val optimized = Optimize.execute(query)

val correctAnswer =
Project(Seq('a, 's),
Generate(Explode('c), true, false, None, 's.string :: Nil,
Project(Seq('a, 'c),
input))).analyze

comparePlans(optimized, correctAnswer)
}

test("Turn Generate.join to false if possible") {
val input = LocalRelation('b.array(StringType))

val query =
Project(('s + 1).as("s+1") :: Nil,
Generate(Explode('b), true, false, None, 's.string :: Nil,
input)).analyze
val optimized = Optimize.execute(query)

val correctAnswer =
Project(('s + 1).as("s+1") :: Nil,
Generate(Explode('b), false, false, None, 's.string :: Nil,
input)).analyze

comparePlans(optimized, correctAnswer)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can also have a test for val input = LocalRelation('a.int, 'b.array(StringType)). Basically, we test turning join to false and then we add a project as the child of Generate.

}

// todo: add more tests for column pruning
}