Skip to content
Closed
Prev Previous commit
Next Next commit
initial commit for regexpreplace
  • Loading branch information
kiszk committed Nov 29, 2017
commit de5cd384b81e94a320f4a9c64a3263c6beec368c
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
val termLastReplacement = ctx.freshName("lastReplacement")
val termLastReplacementInUTF8 = ctx.freshName("lastReplacementInUTF8")

val termResult = ctx.freshName("result")
val termResult = "termResult"

val classNamePattern = classOf[Pattern].getCanonicalName
val classNameStringBuffer = classOf[java.lang.StringBuffer].getCanonicalName
Expand All @@ -334,8 +334,10 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
ctx.addMutableState("String", termLastReplacement, s"${termLastReplacement} = null;")
ctx.addMutableState("UTF8String",
termLastReplacementInUTF8, s"${termLastReplacementInUTF8} = null;")
ctx.addMutableState(classNameStringBuffer,
termResult, s"${termResult} = new $classNameStringBuffer();")
if (!ctx.mutableStates.exists(s => s._1 == termResult)) {
ctx.addMutableState(classNameStringBuffer,
termResult, s"${termResult} = new $classNameStringBuffer();")
}

val setEvNotNull = if (nullable) {
s"${ev.isNull} = false;"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.expressions.codegen.{CodeAndComment, CodeGenerator, CodegenContext}
import org.apache.spark.sql.types.{IntegerType, StringType}

/**
Expand Down Expand Up @@ -178,6 +179,29 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(nonNullExpr, "num-num", row1)
}

test("SPARK-22570: should not create a lot of instance variables") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: RegExpReplace should not create a lot of global variables

val expr = RegExpReplace(Literal("100"), Literal("(\\d+)"), Literal("num"))
val ctx = new CodegenContext
val codes = (1 to 16000).map(_ => expr.genCode(ctx).code)
val eval = ctx.splitExpressions(ctx.INPUT_ROW, codes)
val codeBody = s"""
public RegexpExpressionsTest generate(Object[] references) {
return new RegexpExpressionsTest(references);
}
class RegexpExpressionsTest {
Object[] references;
${ctx.declareMutableStates()}
public RegexpExpressionsTest(Object[] references) {
${ctx.initMutableStates()}
}
public void apply(InternalRow ${ctx.INPUT_ROW}) {
${eval}
}
${ctx.declareAddedFunctions()}
}"""
CodeGenerator.compile(new CodeAndComment(codeBody, ctx.getPlaceHolderToComments()))
}

test("RegexExtract") {
val row1 = create_row("100-200", "(\\d+)-(\\d+)", 1)
val row2 = create_row("100-200", "(\\d+)-(\\d+)", 2)
Expand Down