From 41a591a086d732fe83fe6bdf6f34fae11d08000b Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Wed, 22 Nov 2017 16:47:45 +0000 Subject: [PATCH 01/12] initial commit for cast --- .../spark/sql/catalyst/expressions/Cast.scala | 32 ++++++++++++------- .../sql/catalyst/expressions/CastSuite.scala | 20 ++++++++++++ 2 files changed, 40 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 8cafaef61c7d..f14e6ead2e11 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -799,9 +799,11 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String private[this] def castToByteCode(from: DataType, ctx: CodegenContext): CastFunction = from match { case StringType => - val wrapper = ctx.freshName("wrapper") - ctx.addMutableState("UTF8String.IntWrapper", wrapper, - s"$wrapper = new UTF8String.IntWrapper();") + val wrapper = "intWrapper" + if (!ctx.mutableStates.exists(s => s._1 == wrapper)) { + ctx.addMutableState("UTF8String.IntWrapper", wrapper, + s"$wrapper = new UTF8String.IntWrapper();") + } (c, evPrim, evNull) => s""" if ($c.toByte($wrapper)) { @@ -826,9 +828,11 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String from: DataType, ctx: CodegenContext): CastFunction = from match { case StringType => - val wrapper = ctx.freshName("wrapper") - ctx.addMutableState("UTF8String.IntWrapper", wrapper, - s"$wrapper = new UTF8String.IntWrapper();") + val wrapper = "intWrapper" + if (!ctx.mutableStates.exists(s => s._1 == wrapper)) { + ctx.addMutableState("UTF8String.IntWrapper", wrapper, + s"$wrapper = new UTF8String.IntWrapper();") + } (c, evPrim, evNull) => s""" if ($c.toShort($wrapper)) { @@ -851,9 +855,11 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String private[this] def castToIntCode(from: DataType, ctx: CodegenContext): CastFunction = from match { case StringType => - val wrapper = ctx.freshName("wrapper") - ctx.addMutableState("UTF8String.IntWrapper", wrapper, - s"$wrapper = new UTF8String.IntWrapper();") + val wrapper = "intWrapper" + if (!ctx.mutableStates.exists(s => s._1 == wrapper)) { + ctx.addMutableState("UTF8String.IntWrapper", wrapper, + s"$wrapper = new UTF8String.IntWrapper();") + } (c, evPrim, evNull) => s""" if ($c.toInt($wrapper)) { @@ -876,9 +882,11 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String private[this] def castToLongCode(from: DataType, ctx: CodegenContext): CastFunction = from match { case StringType => - val wrapper = ctx.freshName("wrapper") - ctx.addMutableState("UTF8String.LongWrapper", wrapper, - s"$wrapper = new UTF8String.LongWrapper();") + val wrapper = "longWrapper" + if (!ctx.mutableStates.exists(s => s._1 == wrapper)) { + ctx.addMutableState("UTF8String.LongWrapper", wrapper, + s"$wrapper = new UTF8String.LongWrapper();") + } (c, evPrim, evNull) => s""" diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 7837d6529d12..50ca2494dda0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -845,4 +845,24 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { val outputOuter = Row.fromSeq((1 to N).map(_ => outputInner)) checkEvaluation(cast(Literal.create(inputOuter, fromOuter), toOuter), outputOuter) } + + test("SPARK-22570: should not create a lot of instance variables") { + val N = 30000 + + val from1 = new StructType( + (1 to N).map(i => StructField(s"s$i", StringType)).toArray) + val to1 = new StructType( + (1 to N).map(i => StructField(s"i$i", IntegerType)).toArray) + val input1 = Row.fromSeq((1 to N).map(i => i.toString)) + val output1 = Row.fromSeq((1 to N)) + checkEvaluation(cast(Literal.create(input1, from1), to1), output1) + + val from2 = new StructType( + (1 to N).map(i => StructField(s"s$i", StringType)).toArray) + val to2 = new StructType( + (1 to N).map(i => StructField(s"i$i", LongType)).toArray) + val input2 = Row.fromSeq((1 to N).map(i => i.toString)) + val output2 = Row.fromSeq((1 to N).map(i => i.toLong)) + checkEvaluation(cast(Literal.create(input2, from2), to2), output2) + } } From de5cd384b81e94a320f4a9c64a3263c6beec368c Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Wed, 22 Nov 2017 16:48:08 +0000 Subject: [PATCH 02/12] initial commit for regexpreplace --- .../expressions/regexpExpressions.scala | 8 ++++--- .../expressions/RegexpExpressionsSuite.scala | 24 +++++++++++++++++++ 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index d0d663f63f5d..1321c4724bda 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -322,7 +322,7 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio val termLastReplacement = ctx.freshName("lastReplacement") val termLastReplacementInUTF8 = ctx.freshName("lastReplacementInUTF8") - val termResult = ctx.freshName("result") + val termResult = "termResult" val classNamePattern = classOf[Pattern].getCanonicalName val classNameStringBuffer = classOf[java.lang.StringBuffer].getCanonicalName @@ -334,8 +334,10 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio ctx.addMutableState("String", termLastReplacement, s"${termLastReplacement} = null;") ctx.addMutableState("UTF8String", termLastReplacementInUTF8, s"${termLastReplacementInUTF8} = null;") - ctx.addMutableState(classNameStringBuffer, - termResult, s"${termResult} = new $classNameStringBuffer();") + if (!ctx.mutableStates.exists(s => s._1 == termResult)) { + ctx.addMutableState(classNameStringBuffer, + termResult, s"${termResult} = new $classNameStringBuffer();") + } val setEvNotNull = if (nullable) { s"${ev.isNull} = false;" diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala index 1ce150e09198..f7a0354f1673 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.SparkFunSuite import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.expressions.codegen.{CodeAndComment, CodeGenerator, CodegenContext} import org.apache.spark.sql.types.{IntegerType, StringType} /** @@ -178,6 +179,29 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(nonNullExpr, "num-num", row1) } + test("SPARK-22570: should not create a lot of instance variables") { + val expr = RegExpReplace(Literal("100"), Literal("(\\d+)"), Literal("num")) + val ctx = new CodegenContext + val codes = (1 to 16000).map(_ => expr.genCode(ctx).code) + val eval = ctx.splitExpressions(ctx.INPUT_ROW, codes) + val codeBody = s""" + public RegexpExpressionsTest generate(Object[] references) { + return new RegexpExpressionsTest(references); + } + class RegexpExpressionsTest { + Object[] references; + ${ctx.declareMutableStates()} + public RegexpExpressionsTest(Object[] references) { + ${ctx.initMutableStates()} + } + public void apply(InternalRow ${ctx.INPUT_ROW}) { + ${eval} + } + ${ctx.declareAddedFunctions()} + }""" + CodeGenerator.compile(new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())) + } + test("RegexExtract") { val row1 = create_row("100-200", "(\\d+)-(\\d+)", 1) val row2 = create_row("100-200", "(\\d+)-(\\d+)", 2) From 1046e1d440fc3b5cb1d8026cbddac415cb95e171 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Wed, 22 Nov 2017 16:48:30 +0000 Subject: [PATCH 03/12] createarray --- .../expressions/complexTypeCreator.scala | 25 +++++++++++-------- .../optimizer/complexTypesSuite.scala | 23 +++++++++++++++++ 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 57a7f2e20773..2cad3070ac0d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -87,31 +87,36 @@ private [sql] object GenArrayData { elementType: DataType, elementsCode: Seq[ExprCode], isMapKey: Boolean): (String, Seq[String], String, String) = { - val arrayName = ctx.freshName("array") + val arrayName = "array" val arrayDataName = ctx.freshName("arrayData") val numElements = elementsCode.length if (!ctx.isPrimitiveType(elementType)) { val genericArrayClass = classOf[GenericArrayData].getName - ctx.addMutableState("Object[]", arrayName, - s"$arrayName = new Object[$numElements];") + if (!ctx.mutableStates.exists(s => s._1 == arrayName)) { + ctx.addMutableState("Object[]", arrayName) + } val assignments = elementsCode.zipWithIndex.map { case (eval, i) => - val isNullAssignment = if (!isMapKey) { - s"$arrayName[$i] = null;" + val isNullAssignment = if (eval.isNull == "false") { + "" } else { - "throw new RuntimeException(\"Cannot use null as map key!\");" + if (!isMapKey) { + s"$arrayName[$i] = null;" + } else { + "throw new RuntimeException(\"Cannot use null as map key!\");" + } } eval.code + s""" - if (${eval.isNull}) { - $isNullAssignment - } else { + if (!${eval.isNull}) { $arrayName[$i] = ${eval.value}; + } else { + $isNullAssignment } """ } - ("", + (s"$arrayName = new Object[$numElements];", assignments, s"final ArrayData $arrayDataName = new $genericArrayClass($arrayName);", arrayDataName) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala index 3634accf1ec2..a45e7c4e6ef3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.codegen.{CodeAndComment, CodeGenerator, CodegenContext} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Range} import org.apache.spark.sql.catalyst.rules.RuleExecutor @@ -164,6 +165,28 @@ class ComplexTypesSuite extends PlanTest{ comparePlans(Optimizer execute query, expected) } + test("SPARK-22570: should not create a lot of instance variables") { + val ctx = new CodegenContext + val codes = (1 to 60000).map(i => CreateArray(Seq(Literal(s"$i"))).genCode(ctx).code) + val eval = ctx.splitExpressions(ctx.INPUT_ROW, codes) + val codeBody = s""" + public ComplexTypesTest generate(Object[] references) { + return new ComplexTypesTest(references); + } + class ComplexTypesTest { + Object[] references; + ${ctx.declareMutableStates()} + public ComplexTypesTest(Object[] references) { + ${ctx.initMutableStates()} + } + public void apply(InternalRow ${ctx.INPUT_ROW}) { + ${eval} + } + ${ctx.declareAddedFunctions()} + }""" + CodeGenerator.compile(new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())) + } + test("simplify map ops") { val rel = relation .select( From 13e05b78e0cd2d0e178bb50c660b7f7ff4f50b8a Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Wed, 22 Nov 2017 17:48:07 +0000 Subject: [PATCH 04/12] fix scala style error --- .../spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala | 2 +- .../apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala index f7a0354f1673..92cafbd4eb2d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.SparkFunSuite import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.dsl.expressions._ -import org.apache.spark.sql.catalyst.expressions.codegen.{CodeAndComment, CodeGenerator, CodegenContext} +import org.apache.spark.sql.catalyst.expressions.codegen.{CodeAndComment, CodegenContext, CodeGenerator} import org.apache.spark.sql.types.{IntegerType, StringType} /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala index a45e7c4e6ef3..79ab44bec3b3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.codegen.{CodeAndComment, CodeGenerator, CodegenContext} +import org.apache.spark.sql.catalyst.expressions.codegen.{CodeAndComment, CodegenContext, CodeGenerator} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Range} import org.apache.spark.sql.catalyst.rules.RuleExecutor From 072cdd27faf5dce097cc83b5ba4bbc9e95ba7f6d Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Thu, 23 Nov 2017 01:38:32 +0000 Subject: [PATCH 05/12] fix conflict of variable names --- .../spark/sql/catalyst/expressions/complexTypeCreator.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 2cad3070ac0d..8e3b0e300bfa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -87,11 +87,11 @@ private [sql] object GenArrayData { elementType: DataType, elementsCode: Seq[ExprCode], isMapKey: Boolean): (String, Seq[String], String, String) = { - val arrayName = "array" val arrayDataName = ctx.freshName("arrayData") val numElements = elementsCode.length if (!ctx.isPrimitiveType(elementType)) { + val arrayName = "arrayObject" val genericArrayClass = classOf[GenericArrayData].getName if (!ctx.mutableStates.exists(s => s._1 == arrayName)) { ctx.addMutableState("Object[]", arrayName) @@ -121,6 +121,7 @@ private [sql] object GenArrayData { s"final ArrayData $arrayDataName = new $genericArrayClass($arrayName);", arrayDataName) } else { + val arrayName = ctx.freshName("array") val unsafeArraySizeInBytes = UnsafeArrayData.calculateHeaderPortionInBytes(numElements) + ByteArrayMethods.roundNumberOfBytesToNearestWord(elementType.defaultSize * numElements) From d4e4b0769b2b8d0c69522c1e42890ad698506980 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Thu, 23 Nov 2017 15:57:15 +0000 Subject: [PATCH 06/12] address review comments --- .../spark/sql/catalyst/expressions/Cast.scala | 28 ++++++------------- .../expressions/codegen/CodeGenerator.scala | 17 +++++++++++ .../expressions/complexTypeCreator.scala | 20 +++++-------- .../expressions/regexpExpressions.scala | 6 ++-- 4 files changed, 34 insertions(+), 37 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index f14e6ead2e11..faee16bd2779 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -799,13 +799,10 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String private[this] def castToByteCode(from: DataType, ctx: CodegenContext): CastFunction = from match { case StringType => - val wrapper = "intWrapper" - if (!ctx.mutableStates.exists(s => s._1 == wrapper)) { - ctx.addMutableState("UTF8String.IntWrapper", wrapper, - s"$wrapper = new UTF8String.IntWrapper();") - } + val wrapper = ctx.freshName("intWrapper") (c, evPrim, evNull) => s""" + UTF8String.IntWrapper $wrapper = new UTF8String.IntWrapper(); if ($c.toByte($wrapper)) { $evPrim = (byte) $wrapper.value; } else { @@ -828,13 +825,10 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String from: DataType, ctx: CodegenContext): CastFunction = from match { case StringType => - val wrapper = "intWrapper" - if (!ctx.mutableStates.exists(s => s._1 == wrapper)) { - ctx.addMutableState("UTF8String.IntWrapper", wrapper, - s"$wrapper = new UTF8String.IntWrapper();") - } + val wrapper = ctx.freshName("intWrapper") (c, evPrim, evNull) => s""" + UTF8String.IntWrapper $wrapper = new UTF8String.IntWrapper(); if ($c.toShort($wrapper)) { $evPrim = (short) $wrapper.value; } else { @@ -855,13 +849,10 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String private[this] def castToIntCode(from: DataType, ctx: CodegenContext): CastFunction = from match { case StringType => - val wrapper = "intWrapper" - if (!ctx.mutableStates.exists(s => s._1 == wrapper)) { - ctx.addMutableState("UTF8String.IntWrapper", wrapper, - s"$wrapper = new UTF8String.IntWrapper();") - } + val wrapper = ctx.freshName("intWrapper") (c, evPrim, evNull) => s""" + UTF8String.IntWrapper $wrapper = new UTF8String.IntWrapper(); if ($c.toInt($wrapper)) { $evPrim = $wrapper.value; } else { @@ -882,14 +873,11 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String private[this] def castToLongCode(from: DataType, ctx: CodegenContext): CastFunction = from match { case StringType => - val wrapper = "longWrapper" - if (!ctx.mutableStates.exists(s => s._1 == wrapper)) { - ctx.addMutableState("UTF8String.LongWrapper", wrapper, - s"$wrapper = new UTF8String.LongWrapper();") - } + val wrapper = ctx.freshName("longWrapper") (c, evPrim, evNull) => s""" + UTF8String.LongWrapper $wrapper = new UTF8String.LongWrapper(); if ($c.toLong($wrapper)) { $evPrim = $wrapper.value; } else { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 1645db12c53f..62f071ef8b66 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -184,6 +184,23 @@ class CodegenContext { mutableStates += ((javaType, variableName, initCode)) } + /** + * Add a mutable state as a field to the generated class if the name has not been added + * + * @param javaType Java type of the field. + * @param variableName Name of the field. + * @param initCode The statement(s) to put into the init() method to initialize this field. + * If left blank, the field will be default-initialized. + */ + def reuseOrAddMutableState( + javaType: String, + variableName: String, + initCode: String = ""): Unit = { + if (!mutableStates.exists(s => s._1 == variableName)) { + addMutableState(javaType, variableName, initCode) + } + } + /** * Add buffer variable which stores data coming from an [[InternalRow]]. This methods guarantees * that the variable is safely stored, which is important for (potentially) byte array backed diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 8e3b0e300bfa..510c9f04b0ba 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -93,25 +93,19 @@ private [sql] object GenArrayData { if (!ctx.isPrimitiveType(elementType)) { val arrayName = "arrayObject" val genericArrayClass = classOf[GenericArrayData].getName - if (!ctx.mutableStates.exists(s => s._1 == arrayName)) { - ctx.addMutableState("Object[]", arrayName) - } + ctx.reuseOrAddMutableState("Object[]", arrayName) val assignments = elementsCode.zipWithIndex.map { case (eval, i) => - val isNullAssignment = if (eval.isNull == "false") { - "" + val isNullAssignment = if (!isMapKey) { + s"$arrayName[$i] = null;" } else { - if (!isMapKey) { - s"$arrayName[$i] = null;" - } else { - "throw new RuntimeException(\"Cannot use null as map key!\");" - } + "throw new RuntimeException(\"Cannot use null as map key!\");" } eval.code + s""" - if (!${eval.isNull}) { - $arrayName[$i] = ${eval.value}; - } else { + if (${eval.isNull}) { $isNullAssignment + } else { + $arrayName[$i] = ${eval.value}; } """ } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index 1321c4724bda..580966f0237c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -334,10 +334,8 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio ctx.addMutableState("String", termLastReplacement, s"${termLastReplacement} = null;") ctx.addMutableState("UTF8String", termLastReplacementInUTF8, s"${termLastReplacementInUTF8} = null;") - if (!ctx.mutableStates.exists(s => s._1 == termResult)) { - ctx.addMutableState(classNameStringBuffer, - termResult, s"${termResult} = new $classNameStringBuffer();") - } + ctx.reuseOrAddMutableState(classNameStringBuffer, + termResult, s"${termResult} = new $classNameStringBuffer();") val setEvNotNull = if (nullable) { s"${ev.isNull} = false;" From 36a330ddf98360196df45d041255b8b70fab5450 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Wed, 29 Nov 2017 19:37:14 +0000 Subject: [PATCH 07/12] address review comments --- .../expressions/codegen/CodeGenerator.scala | 17 ----------- .../expressions/complexTypeCreator.scala | 30 +++++++++++-------- .../expressions/regexpExpressions.scala | 8 ++--- .../expressions/RegexpExpressionsSuite.scala | 23 ++++---------- .../optimizer/complexTypesSuite.scala | 20 ++----------- 5 files changed, 28 insertions(+), 70 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 62f071ef8b66..1645db12c53f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -184,23 +184,6 @@ class CodegenContext { mutableStates += ((javaType, variableName, initCode)) } - /** - * Add a mutable state as a field to the generated class if the name has not been added - * - * @param javaType Java type of the field. - * @param variableName Name of the field. - * @param initCode The statement(s) to put into the init() method to initialize this field. - * If left blank, the field will be default-initialized. - */ - def reuseOrAddMutableState( - javaType: String, - variableName: String, - initCode: String = ""): Unit = { - if (!mutableStates.exists(s => s._1 == variableName)) { - addMutableState(javaType, variableName, initCode) - } - } - /** * Add buffer variable which stores data coming from an [[InternalRow]]. This methods guarantees * that the variable is safely stored, which is important for (potentially) byte array backed diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 510c9f04b0ba..6974b69c1448 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -63,7 +63,7 @@ case class CreateArray(children: Seq[Expression]) extends Expression { val (preprocess, assigns, postprocess, arrayData) = GenArrayData.genCodeToCreateArrayData(ctx, et, evals, false) ev.copy( - code = preprocess + ctx.splitExpressions(assigns) + postprocess, + code = preprocess + assigns + postprocess, value = arrayData, isNull = "false") } @@ -77,23 +77,22 @@ private [sql] object GenArrayData { * * @param ctx a [[CodegenContext]] * @param elementType data type of underlying array elements - * @param elementsCode a set of [[ExprCode]] for each element of an underlying array + * @param elementsCode concatenated set of [[ExprCode]] for each element of an underlying array * @param isMapKey if true, throw an exception when the element is null - * @return (code pre-assignments, assignments to each array elements, code post-assignments, - * arrayData name) + * @return (code pre-assignments, concatenated assignments to each array elements, + * code post-assignments, arrayData name) */ def genCodeToCreateArrayData( ctx: CodegenContext, elementType: DataType, elementsCode: Seq[ExprCode], - isMapKey: Boolean): (String, Seq[String], String, String) = { + isMapKey: Boolean): (String, String, String, String) = { val arrayDataName = ctx.freshName("arrayData") val numElements = elementsCode.length if (!ctx.isPrimitiveType(elementType)) { - val arrayName = "arrayObject" + val arrayName = ctx.freshName("arrayObject") val genericArrayClass = classOf[GenericArrayData].getName - ctx.reuseOrAddMutableState("Object[]", arrayName) val assignments = elementsCode.zipWithIndex.map { case (eval, i) => val isNullAssignment = if (!isMapKey) { @@ -109,9 +108,13 @@ private [sql] object GenArrayData { } """ } + val assignmentString = ctx.splitExpressions( + expressions = assignments, + funcName = "apply", + extraArguments = ("Object[]", arrayDataName) :: Nil) (s"$arrayName = new Object[$numElements];", - assignments, + assignmentString, s"final ArrayData $arrayDataName = new $genericArrayClass($arrayName);", arrayDataName) } else { @@ -120,7 +123,6 @@ private [sql] object GenArrayData { UnsafeArrayData.calculateHeaderPortionInBytes(numElements) + ByteArrayMethods.roundNumberOfBytesToNearestWord(elementType.defaultSize * numElements) val baseOffset = Platform.BYTE_ARRAY_OFFSET - ctx.addMutableState("UnsafeArrayData", arrayDataName) val primitiveValueTypeName = ctx.primitiveTypeName(elementType) val assignments = elementsCode.zipWithIndex.map { case (eval, i) => @@ -137,6 +139,10 @@ private [sql] object GenArrayData { } """ } + val assignmentString = ctx.splitExpressions( + expressions = assignments, + funcName = "apply", + extraArguments = ("UnsafeArrayData", arrayDataName) :: Nil) (s""" byte[] $arrayName = new byte[$unsafeArraySizeInBytes]; @@ -144,7 +150,7 @@ private [sql] object GenArrayData { Platform.putLong($arrayName, $baseOffset, $numElements); $arrayDataName.pointTo($arrayName, $baseOffset, $unsafeArraySizeInBytes); """, - assignments, + assignmentString, "", arrayDataName) } @@ -216,10 +222,10 @@ case class CreateMap(children: Seq[Expression]) extends Expression { s""" final boolean ${ev.isNull} = false; $preprocessKeyData - ${ctx.splitExpressions(assignKeys)} + $assignKeys $postprocessKeyData $preprocessValueData - ${ctx.splitExpressions(assignValues)} + $assignValues $postprocessValueData final MapData ${ev.value} = new $mapClass($keyArrayData, $valueArrayData); """ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index 580966f0237c..53d7096dd87d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -321,8 +321,7 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio val termLastReplacement = ctx.freshName("lastReplacement") val termLastReplacementInUTF8 = ctx.freshName("lastReplacementInUTF8") - - val termResult = "termResult" + val termResult = ctx.freshName("termResult") val classNamePattern = classOf[Pattern].getCanonicalName val classNameStringBuffer = classOf[java.lang.StringBuffer].getCanonicalName @@ -334,8 +333,6 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio ctx.addMutableState("String", termLastReplacement, s"${termLastReplacement} = null;") ctx.addMutableState("UTF8String", termLastReplacementInUTF8, s"${termLastReplacementInUTF8} = null;") - ctx.reuseOrAddMutableState(classNameStringBuffer, - termResult, s"${termResult} = new $classNameStringBuffer();") val setEvNotNull = if (nullable) { s"${ev.isNull} = false;" @@ -355,7 +352,7 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio ${termLastReplacementInUTF8} = $rep.clone(); ${termLastReplacement} = ${termLastReplacementInUTF8}.toString(); } - ${termResult}.delete(0, ${termResult}.length()); + $classNameStringBuffer ${termResult} = new $classNameStringBuffer(); java.util.regex.Matcher ${matcher} = ${termPattern}.matcher($subject.toString()); while (${matcher}.find()) { @@ -363,6 +360,7 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio } ${matcher}.appendTail(${termResult}); ${ev.value} = UTF8String.fromString(${termResult}.toString()); + ${termResult} = null; $setEvNotNull """ }) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala index 92cafbd4eb2d..c49ff6a22cc6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala @@ -180,26 +180,13 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { } test("SPARK-22570: should not create a lot of instance variables") { + val N = 16000 val expr = RegExpReplace(Literal("100"), Literal("(\\d+)"), Literal("num")) val ctx = new CodegenContext - val codes = (1 to 16000).map(_ => expr.genCode(ctx).code) - val eval = ctx.splitExpressions(ctx.INPUT_ROW, codes) - val codeBody = s""" - public RegexpExpressionsTest generate(Object[] references) { - return new RegexpExpressionsTest(references); - } - class RegexpExpressionsTest { - Object[] references; - ${ctx.declareMutableStates()} - public RegexpExpressionsTest(Object[] references) { - ${ctx.initMutableStates()} - } - public void apply(InternalRow ${ctx.INPUT_ROW}) { - ${eval} - } - ${ctx.declareAddedFunctions()} - }""" - CodeGenerator.compile(new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())) + (1 to N).map(_ => expr.genCode(ctx).code) + // four global variables (lastRegex, pattern, lastReplacement, and lastReplacementInUTF8) + // are always required + assert(ctx.mutableStates.length == 4 * N) } test("RegexExtract") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala index 79ab44bec3b3..96235d9628b6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala @@ -167,24 +167,8 @@ class ComplexTypesSuite extends PlanTest{ test("SPARK-22570: should not create a lot of instance variables") { val ctx = new CodegenContext - val codes = (1 to 60000).map(i => CreateArray(Seq(Literal(s"$i"))).genCode(ctx).code) - val eval = ctx.splitExpressions(ctx.INPUT_ROW, codes) - val codeBody = s""" - public ComplexTypesTest generate(Object[] references) { - return new ComplexTypesTest(references); - } - class ComplexTypesTest { - Object[] references; - ${ctx.declareMutableStates()} - public ComplexTypesTest(Object[] references) { - ${ctx.initMutableStates()} - } - public void apply(InternalRow ${ctx.INPUT_ROW}) { - ${eval} - } - ${ctx.declareAddedFunctions()} - }""" - CodeGenerator.compile(new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())) + (1 to 60000).map(i => CreateArray(Seq(Literal(s"$i"))).genCode(ctx).code) + assert(ctx.mutableStates.length == 0) } test("simplify map ops") { From 3b8459d4b50f2a923019c74c0800ceee2a9da7d5 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Thu, 30 Nov 2017 01:58:58 +0000 Subject: [PATCH 08/12] fix test failure of HiveThriftBinaryServerSuite.SPARK-4407 --- .../spark/sql/catalyst/expressions/complexTypeCreator.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 6974b69c1448..587314a7a0b5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -146,7 +146,7 @@ private [sql] object GenArrayData { (s""" byte[] $arrayName = new byte[$unsafeArraySizeInBytes]; - $arrayDataName = new UnsafeArrayData(); + UnsafeArrayData $arrayDataName = new UnsafeArrayData(); Platform.putLong($arrayName, $baseOffset, $numElements); $arrayDataName.pointTo($arrayName, $baseOffset, $unsafeArraySizeInBytes); """, From 420b10d1090133524adb18ba81a20fe901611144 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Thu, 30 Nov 2017 13:00:43 +0000 Subject: [PATCH 09/12] fix test failure of HiveThriftBinaryServerSuite.SPARK-4407 --- .../spark/sql/catalyst/expressions/complexTypeCreator.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 587314a7a0b5..fc68bf478e1c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -113,7 +113,7 @@ private [sql] object GenArrayData { funcName = "apply", extraArguments = ("Object[]", arrayDataName) :: Nil) - (s"$arrayName = new Object[$numElements];", + (s"Object[] $arrayName = new Object[$numElements];", assignmentString, s"final ArrayData $arrayDataName = new $genericArrayClass($arrayName);", arrayDataName) From cd4c696772750e6e9b79133446e590bc65a24ce4 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Thu, 30 Nov 2017 13:01:28 +0000 Subject: [PATCH 10/12] clear object pointer at the end of lifetime to avoid unnecessary GC --- .../org/apache/spark/sql/catalyst/expressions/Cast.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index faee16bd2779..f4ecbdb8393a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -808,6 +808,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String } else { $evNull = true; } + $wrapper = null; """ case BooleanType => (c, evPrim, evNull) => s"$evPrim = $c ? (byte) 1 : (byte) 0;" @@ -834,6 +835,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String } else { $evNull = true; } + $wrapper = null; """ case BooleanType => (c, evPrim, evNull) => s"$evPrim = $c ? (short) 1 : (short) 0;" @@ -858,6 +860,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String } else { $evNull = true; } + $wrapper = null; """ case BooleanType => (c, evPrim, evNull) => s"$evPrim = $c ? 1 : 0;" @@ -883,6 +886,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String } else { $evNull = true; } + $wrapper = null; """ case BooleanType => (c, evPrim, evNull) => s"$evPrim = $c ? 1L : 0L;" From a455ac3bf5aa49a15ef80fc7807078eb41bbfe74 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Thu, 30 Nov 2017 13:01:58 +0000 Subject: [PATCH 11/12] address review comments --- .../sql/catalyst/expressions/CastSuite.scala | 24 +++++-------------- .../expressions/RegexpExpressionsSuite.scala | 8 +++---- .../optimizer/complexTypesSuite.scala | 4 ++-- 3 files changed, 11 insertions(+), 25 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 50ca2494dda0..4fe63af4f7de 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -23,6 +23,7 @@ import java.util.{Calendar, Locale, TimeZone} import org.apache.spark.SparkFunSuite import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimeZoneGMT @@ -846,23 +847,10 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(cast(Literal.create(inputOuter, fromOuter), toOuter), outputOuter) } - test("SPARK-22570: should not create a lot of instance variables") { - val N = 30000 - - val from1 = new StructType( - (1 to N).map(i => StructField(s"s$i", StringType)).toArray) - val to1 = new StructType( - (1 to N).map(i => StructField(s"i$i", IntegerType)).toArray) - val input1 = Row.fromSeq((1 to N).map(i => i.toString)) - val output1 = Row.fromSeq((1 to N)) - checkEvaluation(cast(Literal.create(input1, from1), to1), output1) - - val from2 = new StructType( - (1 to N).map(i => StructField(s"s$i", StringType)).toArray) - val to2 = new StructType( - (1 to N).map(i => StructField(s"i$i", LongType)).toArray) - val input2 = Row.fromSeq((1 to N).map(i => i.toString)) - val output2 = Row.fromSeq((1 to N).map(i => i.toLong)) - checkEvaluation(cast(Literal.create(input2, from2), to2), output2) + test("SPARK-22570: Cast should not create a lot of instance variables") { + val ctx = new CodegenContext + cast("1", IntegerType).genCode(ctx).code + cast("2", LongType).genCode(ctx).code + assert(ctx.mutableStates.length == 0) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala index c49ff6a22cc6..1b5a4380291e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala @@ -179,14 +179,12 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(nonNullExpr, "num-num", row1) } - test("SPARK-22570: should not create a lot of instance variables") { - val N = 16000 - val expr = RegExpReplace(Literal("100"), Literal("(\\d+)"), Literal("num")) + test("SPARK-22570: RegExpReplace should not create a lot of global variables") { val ctx = new CodegenContext - (1 to N).map(_ => expr.genCode(ctx).code) + RegExpReplace(Literal("100"), Literal("(\\d+)"), Literal("num")).genCode(ctx) // four global variables (lastRegex, pattern, lastReplacement, and lastReplacementInUTF8) // are always required - assert(ctx.mutableStates.length == 4 * N) + assert(ctx.mutableStates.length == 4) } test("RegexExtract") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala index 96235d9628b6..d5672ad138b5 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala @@ -165,9 +165,9 @@ class ComplexTypesSuite extends PlanTest{ comparePlans(Optimizer execute query, expected) } - test("SPARK-22570: should not create a lot of instance variables") { + test("SPARK-22570: CreateArray should not create a lot of global variables") { val ctx = new CodegenContext - (1 to 60000).map(i => CreateArray(Seq(Literal(s"$i"))).genCode(ctx).code) + CreateArray(Seq(Literal(1))).genCode(ctx).code assert(ctx.mutableStates.length == 0) } From 747e215a06856ae8a8ed49a784ae1327ec703966 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Thu, 30 Nov 2017 14:34:24 +0000 Subject: [PATCH 12/12] address review comments --- .../apache/spark/sql/catalyst/expressions/CastSuite.scala | 6 +++--- .../sql/catalyst/expressions/RegexpExpressionsSuite.scala | 4 ++-- .../spark/sql/catalyst/optimizer/complexTypesSuite.scala | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 4fe63af4f7de..65617be05a43 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -847,10 +847,10 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(cast(Literal.create(inputOuter, fromOuter), toOuter), outputOuter) } - test("SPARK-22570: Cast should not create a lot of instance variables") { + test("SPARK-22570: Cast should not create a lot of global variables") { val ctx = new CodegenContext - cast("1", IntegerType).genCode(ctx).code - cast("2", LongType).genCode(ctx).code + cast("1", IntegerType).genCode(ctx) + cast("2", LongType).genCode(ctx) assert(ctx.mutableStates.length == 0) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala index 1b5a4380291e..4fa61fbaf66c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala @@ -20,8 +20,8 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.SparkFunSuite import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.dsl.expressions._ -import org.apache.spark.sql.catalyst.expressions.codegen.{CodeAndComment, CodegenContext, CodeGenerator} -import org.apache.spark.sql.types.{IntegerType, StringType} +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext +import org.apache.spark.sql.types.StringType /** * Unit tests for regular expression (regexp) related SQL expressions. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala index d5672ad138b5..e3675367d78e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.codegen.{CodeAndComment, CodegenContext, CodeGenerator} +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Range} import org.apache.spark.sql.catalyst.rules.RuleExecutor @@ -167,7 +167,7 @@ class ComplexTypesSuite extends PlanTest{ test("SPARK-22570: CreateArray should not create a lot of global variables") { val ctx = new CodegenContext - CreateArray(Seq(Literal(1))).genCode(ctx).code + CreateArray(Seq(Literal(1))).genCode(ctx) assert(ctx.mutableStates.length == 0) }