-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-22600][SQL] Fix 64kb limit for deeply nested expressions under wholestage codegen #19813
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 17 commits
34abc22
e0d111e
65d07d5
9f848be
57b1add
d051f9e
6368702
8c7f749
7f00515
777eb7a
7230997
fd87e9b
57a9fb7
0d358d6
aa3db2e
429afba
48add65
9443011
2f4014f
655917c
c083a79
1251dfa
c4f15f7
f35974e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -105,6 +105,11 @@ abstract class Expression extends TreeNode[Expression] { | |
| val isNull = ctx.freshName("isNull") | ||
| val value = ctx.freshName("value") | ||
| val eval = doGenCode(ctx, ExprCode("", isNull, value)) | ||
|
|
||
| // Records current input row and variables of this expression. | ||
| eval.inputRow = ctx.INPUT_ROW | ||
| eval.inputVars = findInputVars(ctx, eval) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shall we do one more thing here?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. actually we can be more aggresive and do
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok. Let me try it in next commit.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The aggressive version (
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
|
||
| reduceCodeSize(ctx, eval) | ||
| if (eval.code.nonEmpty) { | ||
| // Add `this` in the comment. | ||
|
|
@@ -115,9 +120,28 @@ abstract class Expression extends TreeNode[Expression] { | |
| } | ||
| } | ||
|
|
||
| /** | ||
| * Returns the input variables to this expression. | ||
| */ | ||
| private def findInputVars(ctx: CodegenContext, eval: ExprCode): Seq[ExprInputVar] = { | ||
| if (ctx.currentVars != null) { | ||
| this.collect { | ||
| case b @ BoundReference(ordinal, _, _) if ctx.currentVars(ordinal) != null => | ||
| ExprInputVar(b, exprCode = ctx.currentVars(ordinal)) | ||
| } | ||
| } else { | ||
| Seq.empty | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * In order to prevent 64kb compile error, reducing the size of generated codes by | ||
| * separating it into a function if the size exceeds a threshold. | ||
| */ | ||
| private def reduceCodeSize(ctx: CodegenContext, eval: ExprCode): Unit = { | ||
| // TODO: support whole stage codegen too | ||
| if (eval.code.trim.length > 1024 && ctx.INPUT_ROW != null && ctx.currentVars == null) { | ||
| lazy val funcParams = ExpressionCodegen.getExpressionInputParams(ctx, this) | ||
|
|
||
| if (eval.code.trim.length > 1024 && funcParams.isDefined) { | ||
| val setIsNull = if (eval.isNull != "false" && eval.isNull != "true") { | ||
| val globalIsNull = ctx.freshName("globalIsNull") | ||
| ctx.addMutableState(ctx.JAVA_BOOLEAN, globalIsNull) | ||
|
|
@@ -132,17 +156,20 @@ abstract class Expression extends TreeNode[Expression] { | |
| val newValue = ctx.freshName("value") | ||
|
|
||
| val funcName = ctx.freshName(nodeName) | ||
| val callParams = funcParams.map(_._1.mkString(", ")).get | ||
| val declParams = funcParams.map(_._2.mkString(", ")).get | ||
|
|
||
| val funcFullName = ctx.addNewFunction(funcName, | ||
| s""" | ||
| |private $javaType $funcName(InternalRow ${ctx.INPUT_ROW}) { | ||
| |private $javaType $funcName($declParams) { | ||
| | ${eval.code.trim} | ||
| | $setIsNull | ||
| | return ${eval.value}; | ||
| |} | ||
| """.stripMargin) | ||
|
|
||
| eval.value = newValue | ||
| eval.code = s"$javaType $newValue = $funcFullName(${ctx.INPUT_ROW});" | ||
| eval.code = s"$javaType $newValue = $funcFullName($callParams);" | ||
| } | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -55,8 +55,23 @@ import org.apache.spark.util.{ParentClassLoader, Utils} | |
| * to null. | ||
| * @param value A term for a (possibly primitive) value of the result of the evaluation. Not | ||
| * valid if `isNull` is set to `true`. | ||
| * @param inputRow A term that holds the input row name when generating this code. | ||
| * @param inputVars A list of [[ExprInputVar]] that holds input variables when generating this code. | ||
| */ | ||
| case class ExprCode(var code: String, var isNull: String, var value: String) | ||
| case class ExprCode( | ||
| var code: String, | ||
| var isNull: String, | ||
| var value: String, | ||
| var inputRow: String = null, | ||
| var inputVars: Seq[ExprInputVar] = Seq.empty) | ||
|
|
||
| /** | ||
| * Represents an input variable [[ExprCode]] to an evaluation of an [[Expression]]. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please add parameter doc
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added. |
||
| * | ||
| * @param expr The expression that is evaluated to the input variable. | ||
| * @param exprCode The [[ExprCode]] that represents the evaluation result for the input variable. | ||
| */ | ||
| case class ExprInputVar(expr: Expression, exprCode: ExprCode) | ||
|
||
|
|
||
| /** | ||
| * State used for subexpression elimination. | ||
|
|
@@ -979,7 +994,11 @@ class CodegenContext { | |
| val expr = e.head | ||
| // Generate the code for this expression tree. | ||
| val eval = expr.genCode(this) | ||
| val state = SubExprEliminationState(eval.isNull, eval.value) | ||
| val state = if (expr.nullable) { | ||
|
||
| SubExprEliminationState(eval.isNull, eval.value) | ||
| } else { | ||
| SubExprEliminationState("false", eval.value) | ||
| } | ||
| e.foreach(subExprEliminationExprs.put(_, state)) | ||
| eval.code.trim | ||
| } | ||
|
|
@@ -1001,16 +1020,25 @@ class CodegenContext { | |
| commonExprs.foreach { e => | ||
| val expr = e.head | ||
| val fnName = freshName("evalExpr") | ||
| val isNull = s"${fnName}IsNull" | ||
| val isNull = if (expr.nullable) { | ||
| s"${fnName}IsNull" | ||
| } else { | ||
| "" | ||
| } | ||
| val value = s"${fnName}Value" | ||
|
|
||
| // Generate the code for this expression tree and wrap it in a function. | ||
| val eval = expr.genCode(this) | ||
| val nullValue = if (expr.nullable) { | ||
|
||
| s"$isNull = ${eval.isNull};" | ||
| } else { | ||
| "" | ||
| } | ||
| val fn = | ||
| s""" | ||
| |private void $fnName(InternalRow $INPUT_ROW) { | ||
| | ${eval.code.trim} | ||
| | $isNull = ${eval.isNull}; | ||
| | $nullValue | ||
| | $value = ${eval.value}; | ||
| |} | ||
| """.stripMargin | ||
|
|
@@ -1028,12 +1056,17 @@ class CodegenContext { | |
| // 2. Less code. | ||
| // Currently, we will do this for all non-leaf only expression trees (i.e. expr trees with | ||
| // at least two nodes) as the cost of doing it is expected to be low. | ||
| addMutableState(JAVA_BOOLEAN, isNull, s"$isNull = false;") | ||
| addMutableState(javaType(expr.dataType), value, | ||
| s"$value = ${defaultValue(expr.dataType)};") | ||
| if (expr.nullable) { | ||
| addMutableState(JAVA_BOOLEAN, isNull) | ||
| } | ||
| addMutableState(javaType(expr.dataType), value) | ||
|
|
||
| subexprFunctions += s"${addNewFunction(fnName, fn)}($INPUT_ROW);" | ||
| val state = SubExprEliminationState(isNull, value) | ||
| val state = if (expr.nullable) { | ||
| SubExprEliminationState(isNull, value) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we still need it?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is still needed.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because here it is not |
||
| } else { | ||
| SubExprEliminationState("false", value) | ||
| } | ||
| e.foreach(subExprEliminationExprs.put(_, state)) | ||
| } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this kind of optimizations are already done in https://github.com/apache/spark/pull/19813/files#diff-b3ebf3b40b9d4b6e98bb29ac8bb5aadaR108
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
oh, right.