Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
d441794
update API of addMutableState
kiszk Nov 24, 2017
870d106
eliminate initialization with default value
kiszk Nov 25, 2017
24d7087
allocate a global Java array to store a lot of mutable state in a class
kiszk Nov 26, 2017
3eb5842
fix scala style error
kiszk Nov 26, 2017
074d711
fix test failure of ExpressionEncoderSuite.NestedArray
kiszk Nov 27, 2017
eafa3f8
rebase with master
kiszk Nov 30, 2017
90d15f3
address review comments
kiszk Nov 30, 2017
c456c07
add useFreshname parameter to addMutableState
kiszk Nov 30, 2017
9ca5ab3
rebase with master
kiszk Nov 30, 2017
5b36c61
fix test failures
kiszk Dec 1, 2017
fd51d75
drop to creat a loop for initialization
kiszk Dec 7, 2017
effe918
fix test failure
kiszk Dec 8, 2017
9df109c
update comments
kiszk Dec 8, 2017
634d494
address review comment
kiszk Dec 10, 2017
d3438fd
address review comment
kiszk Dec 12, 2017
f4f3754
address review comment
kiszk Dec 12, 2017
f1e1fca
address review comments except test case
kiszk Dec 13, 2017
0937ef2
rebase with master
kiszk Dec 13, 2017
4bfcc1a
Do not use compaction as possible for frequently-accessed variable
kiszk Dec 13, 2017
49119a9
exclude mutable state from argument list for ExpressionCodegn
kiszk Dec 13, 2017
24f49c5
fix test failures
kiszk Dec 14, 2017
15e967e
address review comments
kiszk Dec 14, 2017
d6c1a97
address review comments
kiszk Dec 14, 2017
a9d40e9
address review comments
kiszk Dec 14, 2017
31914c0
address review comments
kiszk Dec 15, 2017
0e45c19
address review comments
kiszk Dec 19, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
update API of addMutableState
  • Loading branch information
kiszk committed Dec 14, 2017
commit d44179403857fafeddd8110c4ce6961a5231ef0c
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,7 @@ abstract class Expression extends TreeNode[Expression] {
// TODO: support whole stage codegen too
if (eval.code.trim.length > 1024 && ctx.INPUT_ROW != null && ctx.currentVars == null) {
val setIsNull = if (eval.isNull != "false" && eval.isNull != "true") {
val globalIsNull = ctx.freshName("globalIsNull")
ctx.addMutableState(ctx.JAVA_BOOLEAN, globalIsNull)
val globalIsNull = ctx.addMutableState(ctx.JAVA_BOOLEAN, "globalIsNull")
val localIsNull = eval.isNull
eval.isNull = globalIsNull
s"$globalIsNull = $localIsNull;"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,8 @@ case class MonotonicallyIncreasingID() extends LeafExpression with Nondeterminis
}

override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val countTerm = ctx.freshName("count")
val partitionMaskTerm = ctx.freshName("partitionMask")
ctx.addMutableState(ctx.JAVA_LONG, countTerm)
ctx.addMutableState(ctx.JAVA_LONG, partitionMaskTerm)
val countTerm = ctx.addMutableState(ctx.JAVA_LONG, "count")
val partitionMaskTerm = ctx.addMutableState(ctx.JAVA_LONG, "partitionMask")
ctx.addPartitionInitializationStatement(s"$countTerm = 0L;")
ctx.addPartitionInitializationStatement(s"$partitionMaskTerm = ((long) partitionIndex) << 33;")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,7 @@ case class SparkPartitionID() extends LeafExpression with Nondeterministic {
override protected def evalInternal(input: InternalRow): Int = partitionId

override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val idTerm = ctx.freshName("partitionId")
ctx.addMutableState(ctx.JAVA_INT, idTerm)
val idTerm = ctx.addMutableState(ctx.JAVA_INT, "partitionId")
ctx.addPartitionInitializationStatement(s"$idTerm = partitionIndex;")
ev.copy(code = s"final ${ctx.javaType(dataType)} ${ev.value} = $idTerm;", isNull = "false")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -602,8 +602,7 @@ case class Least(children: Seq[Expression]) extends Expression {

override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val evalChildren = children.map(_.genCode(ctx))
val tmpIsNull = ctx.freshName("leastTmpIsNull")
ctx.addMutableState(ctx.JAVA_BOOLEAN, tmpIsNull)
val tmpIsNull = ctx.addMutableState(ctx.JAVA_BOOLEAN, "leastTmpIsNull")
val evals = evalChildren.map(eval =>
s"""
|${eval.code}
Expand Down Expand Up @@ -683,8 +682,7 @@ case class Greatest(children: Seq[Expression]) extends Expression {

override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val evalChildren = children.map(_.genCode(ctx))
val tmpIsNull = ctx.freshName("greatestTmpIsNull")
ctx.addMutableState(ctx.JAVA_BOOLEAN, tmpIsNull)
val tmpIsNull = ctx.addMutableState(ctx.JAVA_BOOLEAN, "greatestTmpIsNull")
val evals = evalChildren.map(eval =>
s"""
|${eval.code}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,11 +163,24 @@ class CodegenContext {
* the list of default imports available.
* Also, generic type arguments are accepted but ignored.
* @param variableName Name of the field.
* @param initCode The statement(s) to put into the init() method to initialize this field.
* @param codeFunctions Function includes statement(s) to put into the init() method to
* initialize this field. An argument is the name of the mutable state variable
* If left blank, the field will be default-initialized.
* @param inline whether the declaration and initialization code may be inlined rather than
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: forceInline?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we have a clear rule when to force inline it?

* compacted. If true, the name is not changed
* @return the name of the mutable state variable, which is either the original name if the
* variable is inlined to the class, or an array access if the variable is to be stored
* in an array of variables of the same type and initialization.
*/
def addMutableState(javaType: String, variableName: String, initCode: String = ""): Unit = {
mutableStates += ((javaType, variableName, initCode))
def addMutableState(
javaType: String,
variableName: String,
codeFunctions: String => String = _ => "",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

initFunc?

inline: Boolean = false): String = {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm also a bit scared about using regex to match simple assignment. Maybe we should have 2 versions of addMutableState: one provides a simple initial value, one provides arbitrary initializing code.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One of possible approaches. Let me count the number of calls of each type.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if we go this way, what about creating a MutableStateBuilder then, with methods like withSimpleInitialValue taking a string and withComplexInit taking a function or something like that?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed there are only about 15 complex assignment cases. I will specify inline = true for such as case and eliminate conditions using regex.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kiszk thanks! A big 👍 for eliminating the regexps! :) I'd prefer something more explicit about the reason why it is inlined, like the solutions proposed above. I think readability would be better. WDYT?

val newVariableName = if (!inline) freshName(variableName) else variableName
val initCode = codeFunctions(newVariableName)
mutableStates += ((javaType, newVariableName, initCode))
newVariableName
}

/**
Expand All @@ -176,8 +189,7 @@ class CodegenContext {
* data types like: UTF8String, ArrayData, MapData & InternalRow.
*/
def addBufferedState(dataType: DataType, variableName: String, initCode: String): ExprCode = {
val value = freshName(variableName)
addMutableState(javaType(dataType), value, "")
val value = addMutableState(javaType(dataType), variableName)
val code = dataType match {
case StringType => s"$value = $initCode.clone();"
case _: StructType | _: ArrayType | _: MapType => s"$value = $initCode.copy();"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,37 +61,34 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
case (ev, i) =>
val e = expressions(i)
if (e.nullable) {
val isNull = s"isNull_$i"
val value = s"value_$i"
ctx.addMutableState(ctx.JAVA_BOOLEAN, isNull, s"$isNull = true;")
ctx.addMutableState(ctx.javaType(e.dataType), value,
s"$value = ${ctx.defaultValue(e.dataType)};")
s"""
val isNull = ctx.addMutableState(ctx.JAVA_BOOLEAN, s"isNull_$i", v => s"$v = true;")
val value = ctx.addMutableState(ctx.javaType(e.dataType), s"value_$i",
v => s"$v = ${ctx.defaultValue(e.dataType)};")
(s"""
${ev.code}
$isNull = ${ev.isNull};
$value = ${ev.value};
"""
""", isNull, value, i)
} else {
val value = s"value_$i"
ctx.addMutableState(ctx.javaType(e.dataType), value,
s"$value = ${ctx.defaultValue(e.dataType)};")
s"""
val value = ctx.addMutableState(ctx.javaType(e.dataType), s"value_$i",
v => s"$v = ${ctx.defaultValue(e.dataType)};")
(s"""
${ev.code}
$value = ${ev.value};
"""
""", ev.isNull, value, i)
}
}

// Evaluate all the subexpressions.
val evalSubexpr = ctx.subexprFunctions.mkString("\n")

val updates = validExpr.zip(index).map {
case (e, i) =>
val ev = ExprCode("", s"isNull_$i", s"value_$i")
val updates = validExpr.zip(projectionCodes).map {
case (e, (_, isNull, value, i)) =>
val ev = ExprCode("", isNull, value)
ctx.updateColumn("mutableRow", e.dataType, i, ev, e.nullable)
}

val allProjections = ctx.splitExpressionsWithCurrentInputs(projectionCodes)
val allProjections = ctx.splitExpressionsWithCurrentInputs(projectionCodes.map(_._1))
val allUpdates = ctx.splitExpressionsWithCurrentInputs(updates)

val codeBody = s"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,8 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
bufferHolder: String,
isTopLevel: Boolean = false): String = {
val rowWriterClass = classOf[UnsafeRowWriter].getName
val rowWriter = ctx.freshName("rowWriter")
ctx.addMutableState(rowWriterClass, rowWriter,
s"$rowWriter = new $rowWriterClass($bufferHolder, ${inputs.length});")
val rowWriter = ctx.addMutableState(rowWriterClass, "rowWriter",
v => s"$v = new $rowWriterClass($bufferHolder, ${inputs.length});")

val resetWriter = if (isTopLevel) {
// For top level row writer, it always writes to the beginning of the global buffer holder,
Expand Down Expand Up @@ -186,9 +185,8 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
// Puts `input` in a local variable to avoid to re-evaluate it if it's a statement.
val tmpInput = ctx.freshName("tmpInput")
val arrayWriterClass = classOf[UnsafeArrayWriter].getName
val arrayWriter = ctx.freshName("arrayWriter")
ctx.addMutableState(arrayWriterClass, arrayWriter,
s"$arrayWriter = new $arrayWriterClass();")
val arrayWriter = ctx.addMutableState(arrayWriterClass, "arrayWriter",
v => s"$v = new $arrayWriterClass();")
val numElements = ctx.freshName("numElements")
val index = ctx.freshName("index")

Expand Down Expand Up @@ -318,13 +316,12 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
case _ => true
}

val result = ctx.freshName("result")
ctx.addMutableState("UnsafeRow", result, s"$result = new UnsafeRow(${expressions.length});")
val result = ctx.addMutableState("UnsafeRow", "result",
v => s"$v = new UnsafeRow(${expressions.length});")

val holder = ctx.freshName("holder")
val holderClass = classOf[BufferHolder].getName
ctx.addMutableState(holderClass, holder,
s"$holder = new $holderClass($result, ${numVarLenFields * 32});")
val holder = ctx.addMutableState(holderClass, "holder",
v => s"$v = new $holderClass($result, ${numVarLenFields * 32});")

val resetBufferHolder = if (numVarLenFields == 0) {
""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,7 @@ case class CaseWhen(
// It is initialized to `NOT_MATCHED`, and if it's set to `HAS_NULL` or `HAS_NONNULL`,
// We won't go on anymore on the computation.
val resultState = ctx.freshName("caseWhenResultState")
val tmpResult = ctx.freshName("caseWhenTmpResult")
ctx.addMutableState(ctx.javaType(dataType), tmpResult)
val tmpResult = ctx.addMutableState(ctx.javaType(dataType), "caseWhenTmpResult")

// these blocks are meant to be inside a
// do {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -442,9 +442,9 @@ case class DayOfWeek(child: Expression) extends UnaryExpression with ImplicitCas
override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
nullSafeCodeGen(ctx, ev, time => {
val cal = classOf[Calendar].getName
val c = ctx.freshName("cal")
val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
ctx.addMutableState(cal, c, s"""$c = $cal.getInstance($dtu.getTimeZone("UTC"));""")
val c = ctx.addMutableState(cal, "cal",
v => s"""$v = $cal.getInstance($dtu.getTimeZone("UTC"));""")
s"""
$c.setTimeInMillis($time * 1000L * 3600L * 24L);
${ev.value} = $c.get($cal.DAY_OF_WEEK);
Expand Down Expand Up @@ -484,13 +484,12 @@ case class WeekOfYear(child: Expression) extends UnaryExpression with ImplicitCa
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
nullSafeCodeGen(ctx, ev, time => {
val cal = classOf[Calendar].getName
val c = ctx.freshName("cal")
val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
ctx.addMutableState(cal, c,
s"""
$c = $cal.getInstance($dtu.getTimeZone("UTC"));
$c.setFirstDayOfWeek($cal.MONDAY);
$c.setMinimalDaysInFirstWeek(4);
val c = ctx.addMutableState(cal, "cal",
v => s"""
$v = $cal.getInstance($dtu.getTimeZone("UTC"));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's use the new multiline string style here

$v.setFirstDayOfWeek($cal.MONDAY);
$v.setMinimalDaysInFirstWeek(4);
""")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you forget the stripMargin, now the | will be in the code and fail to compile...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch, thanks

s"""
$c.setTimeInMillis($time * 1000L * 3600L * 24L);
Expand Down Expand Up @@ -1014,12 +1013,12 @@ case class FromUTCTimestamp(left: Expression, right: Expression)
|long ${ev.value} = 0;
""".stripMargin)
} else {
val tzTerm = ctx.freshName("tz")
val utcTerm = ctx.freshName("utc")
val tzClass = classOf[TimeZone].getName
val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
ctx.addMutableState(tzClass, tzTerm, s"""$tzTerm = $dtu.getTimeZone("$tz");""")
ctx.addMutableState(tzClass, utcTerm, s"""$utcTerm = $dtu.getTimeZone("UTC");""")
val tzTerm = ctx.addMutableState(tzClass, "tz",
v => s"""$v = $dtu.getTimeZone("$tz");""")
val utcTerm = ctx.addMutableState(tzClass, "utc",
v => s"""$v = $dtu.getTimeZone("UTC");""")
val eval = left.genCode(ctx)
ev.copy(code = s"""
|${eval.code}
Expand Down Expand Up @@ -1190,12 +1189,12 @@ case class ToUTCTimestamp(left: Expression, right: Expression)
|long ${ev.value} = 0;
""".stripMargin)
} else {
val tzTerm = ctx.freshName("tz")
val utcTerm = ctx.freshName("utc")
val tzClass = classOf[TimeZone].getName
val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
ctx.addMutableState(tzClass, tzTerm, s"""$tzTerm = $dtu.getTimeZone("$tz");""")
ctx.addMutableState(tzClass, utcTerm, s"""$utcTerm = $dtu.getTimeZone("UTC");""")
val tzTerm = ctx.addMutableState(tzClass, "tz",
v => s"""$v = $dtu.getTimeZone("$tz");""")
val utcTerm = ctx.addMutableState(tzClass, "utc",
v => s"""$v = $dtu.getTimeZone("UTC");""")
val eval = left.genCode(ctx)
ev.copy(code = s"""
|${eval.code}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,8 @@ case class Stack(children: Seq[Expression]) extends Generator {

override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
// Rows - we write these into an array.
val rowData = ctx.freshName("rows")
ctx.addMutableState("InternalRow[]", rowData, s"$rowData = new InternalRow[$numRows];")
val rowData = ctx.addMutableState("InternalRow[]", "rows",
v => s"$v = new InternalRow[$numRows];")
val values = children.tail
val dataTypes = values.take(numFields).map(_.dataType)
val code = ctx.splitExpressionsWithCurrentInputs(Seq.tabulate(numRows) { row =>
Expand All @@ -212,12 +212,12 @@ case class Stack(children: Seq[Expression]) extends Generator {
s"${eval.code}\n$rowData[$row] = ${eval.value};"
})

// Create the collection.
// Create the collection. Inline to outer class.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, since we have dependency with new InternalRow[$numRows]. We have to inline new InternalRow[$numRows], too.

val wrapperClass = classOf[mutable.WrappedArray[_]].getName
ctx.addMutableState(
s"$wrapperClass<InternalRow>",
ev.value,
s"${ev.value} = $wrapperClass$$.MODULE$$.make($rowData);")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can localize the global variable ev.value here to save one global variable slot.

Copy link
Member Author

@kiszk kiszk Dec 18, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will do it in another PR after merging this.

_ => s"${ev.value} = $wrapperClass$$.MODULE$$.make($rowData);", inline = true)
ev.copy(code = code, isNull = "false")
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,7 @@ case class Coalesce(children: Seq[Expression]) extends Expression {
}

override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val tmpIsNull = ctx.freshName("coalesceTmpIsNull")
ctx.addMutableState(ctx.JAVA_BOOLEAN, tmpIsNull)
val tmpIsNull = ctx.addMutableState(ctx.JAVA_BOOLEAN, "coalesceTmpIsNull")

// all the evals are meant to be in a do { ... } while (false); loop
val evals = children.map { e =>
Expand Down
Loading