Skip to content
Closed
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
34abc22
Support wholestage codegen for reducing expression codes to prevent 6…
viirya Nov 24, 2017
e0d111e
Merge remote-tracking branch 'upstream/master' into reduce-expr-code-…
viirya Nov 25, 2017
65d07d5
Assert the added test is under wholestage codegen.
viirya Nov 25, 2017
9f848be
Put input rows and evaluated columns referred by deferred expressions…
viirya Nov 27, 2017
57b1add
Revert unnecessary changes.
viirya Nov 27, 2017
d051f9e
Fix subexpression isNull for non nullable case. Fix columnar batch sc…
viirya Nov 28, 2017
6368702
Let rowidx as global variable instead of early evaluation of column o…
viirya Nov 28, 2017
8c7f749
Fix the problematic case.
viirya Nov 28, 2017
7f00515
Fix duplicate parameters.
viirya Nov 29, 2017
777eb7a
Address comments.
viirya Nov 30, 2017
7230997
Polish the patch.
viirya Nov 30, 2017
fd87e9b
Add test for new APIs.
viirya Nov 30, 2017
57a9fb7
Generate function parameters if needed.
viirya Nov 30, 2017
0d358d6
Address comments.
viirya Dec 1, 2017
aa3db2e
Address comments.
viirya Dec 1, 2017
429afba
Rename variable.
viirya Dec 4, 2017
48add65
Address comments.
viirya Dec 5, 2017
9443011
Address comments.
viirya Dec 8, 2017
2f4014f
Address comments again.
viirya Dec 11, 2017
655917c
Remove redundant optimization.
viirya Dec 12, 2017
c083a79
Use utility method.
viirya Dec 12, 2017
1251dfa
Address comments.
viirya Dec 12, 2017
c4f15f7
Move isLiteral and isEvaluated into ExpressionCodegen.
viirya Dec 12, 2017
f35974e
Remove useless isLiteral and isEvaluted. Add one more test.
viirya Dec 12, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,11 @@ case class BoundReference(ordinal: Int, dataType: DataType, nullable: Boolean)
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
if (ctx.currentVars != null && ctx.currentVars(ordinal) != null) {
val oev = ctx.currentVars(ordinal)
ev.isNull = oev.isNull
if (nullable) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh, right.

ev.isNull = oev.isNull
} else {
ev.isNull = "false"
}
ev.value = oev.value
ev.copy(code = oev.code)
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,11 @@ abstract class Expression extends TreeNode[Expression] {
val isNull = ctx.freshName("isNull")
val value = ctx.freshName("value")
val eval = doGenCode(ctx, ExprCode("", isNull, value))

// Records current input row and variables of this expression.
eval.inputRow = ctx.INPUT_ROW
eval.inputVars = findInputVars(ctx, eval)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shall we do one more thing here?

eval.isNull = if (this.nullable) eval.isNull else "false"

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually we can be more aggresive and do val isNull = if (this.nullable) eval.isNull else "false", and see if there is any compile errors.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok. Let me try it in next commit.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The aggressive version (val isNull = if (this.nullable) ctx.freshName("isNull") else "false") fails compilation when testing locally.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

eval.isNull = if (this.nullable) eval.isNull else "false" seems ok.


reduceCodeSize(ctx, eval)
if (eval.code.nonEmpty) {
// Add `this` in the comment.
Expand All @@ -115,9 +120,28 @@ abstract class Expression extends TreeNode[Expression] {
}
}

/**
* Returns the input variables to this expression.
*/
private def findInputVars(ctx: CodegenContext, eval: ExprCode): Seq[ExprInputVar] = {
if (ctx.currentVars != null) {
this.collect {
case b @ BoundReference(ordinal, _, _) if ctx.currentVars(ordinal) != null =>
ExprInputVar(b, exprCode = ctx.currentVars(ordinal))
}
} else {
Seq.empty
}
}

/**
* In order to prevent 64kb compile error, reducing the size of generated codes by
* separating it into a function if the size exceeds a threshold.
*/
private def reduceCodeSize(ctx: CodegenContext, eval: ExprCode): Unit = {
// TODO: support whole stage codegen too
if (eval.code.trim.length > 1024 && ctx.INPUT_ROW != null && ctx.currentVars == null) {
lazy val funcParams = ExpressionCodegen.getExpressionInputParams(ctx, this)

if (eval.code.trim.length > 1024 && funcParams.isDefined) {
val setIsNull = if (eval.isNull != "false" && eval.isNull != "true") {
val globalIsNull = ctx.freshName("globalIsNull")
ctx.addMutableState(ctx.JAVA_BOOLEAN, globalIsNull)
Expand All @@ -132,17 +156,20 @@ abstract class Expression extends TreeNode[Expression] {
val newValue = ctx.freshName("value")

val funcName = ctx.freshName(nodeName)
val callParams = funcParams.map(_._1.mkString(", ")).get
val declParams = funcParams.map(_._2.mkString(", ")).get

val funcFullName = ctx.addNewFunction(funcName,
s"""
|private $javaType $funcName(InternalRow ${ctx.INPUT_ROW}) {
|private $javaType $funcName($declParams) {
| ${eval.code.trim}
| $setIsNull
| return ${eval.value};
|}
""".stripMargin)

eval.value = newValue
eval.code = s"$javaType $newValue = $funcFullName(${ctx.INPUT_ROW});"
eval.code = s"$javaType $newValue = $funcFullName($callParams);"
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,23 @@ import org.apache.spark.util.{ParentClassLoader, Utils}
* to null.
* @param value A term for a (possibly primitive) value of the result of the evaluation. Not
* valid if `isNull` is set to `true`.
* @param inputRow A term that holds the input row name when generating this code.
* @param inputVars A list of [[ExprInputVar]] that holds input variables when generating this code.
*/
case class ExprCode(var code: String, var isNull: String, var value: String)
case class ExprCode(
var code: String,
var isNull: String,
var value: String,
var inputRow: String = null,
var inputVars: Seq[ExprInputVar] = Seq.empty)

/**
* Represents an input variable [[ExprCode]] to an evaluation of an [[Expression]].
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please add parameter doc

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added.

*
* @param expr The expression that is evaluated to the input variable.
* @param exprCode The [[ExprCode]] that represents the evaluation result for the input variable.
*/
case class ExprInputVar(expr: Expression, exprCode: ExprCode)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so the expr is always BoundReference?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually all we need is dataType and nullable, can we just define

case class ExprInputVar(exprCode: ExprCode, dataType: DataType, nullable: Boolean)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, I also think about this before. Just thinking taking an expression is simpler. I will change it.


/**
* State used for subexpression elimination.
Expand Down Expand Up @@ -979,7 +994,11 @@ class CodegenContext {
val expr = e.head
// Generate the code for this expression tree.
val eval = expr.genCode(this)
val state = SubExprEliminationState(eval.isNull, eval.value)
val state = if (expr.nullable) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we still need it?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, can remove it.

SubExprEliminationState(eval.isNull, eval.value)
} else {
SubExprEliminationState("false", eval.value)
}
e.foreach(subExprEliminationExprs.put(_, state))
eval.code.trim
}
Expand All @@ -1001,16 +1020,25 @@ class CodegenContext {
commonExprs.foreach { e =>
val expr = e.head
val fnName = freshName("evalExpr")
val isNull = s"${fnName}IsNull"
val isNull = if (expr.nullable) {
s"${fnName}IsNull"
} else {
""
}
val value = s"${fnName}Value"

// Generate the code for this expression tree and wrap it in a function.
val eval = expr.genCode(this)
val nullValue = if (expr.nullable) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: assignIsNull

s"$isNull = ${eval.isNull};"
} else {
""
}
val fn =
s"""
|private void $fnName(InternalRow $INPUT_ROW) {
| ${eval.code.trim}
| $isNull = ${eval.isNull};
| $nullValue
| $value = ${eval.value};
|}
""".stripMargin
Expand All @@ -1028,12 +1056,17 @@ class CodegenContext {
// 2. Less code.
// Currently, we will do this for all non-leaf only expression trees (i.e. expr trees with
// at least two nodes) as the cost of doing it is expected to be low.
addMutableState(JAVA_BOOLEAN, isNull, s"$isNull = false;")
addMutableState(javaType(expr.dataType), value,
s"$value = ${defaultValue(expr.dataType)};")
if (expr.nullable) {
addMutableState(JAVA_BOOLEAN, isNull)
}
addMutableState(javaType(expr.dataType), value)

subexprFunctions += s"${addNewFunction(fnName, fn)}($INPUT_ROW);"
val state = SubExprEliminationState(isNull, value)
val state = if (expr.nullable) {
SubExprEliminationState(isNull, value)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we still need it?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is still needed.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because here it is not SubExprEliminationState(ev.isNull, value).

} else {
SubExprEliminationState("false", value)
}
e.foreach(subExprEliminationExprs.put(_, state))
}
}
Expand Down
Loading