From bbcdeda98c14705b6de3efab70f2c58bc4539bb9 Mon Sep 17 00:00:00 2001 From: Shuai Lin Date: Sat, 11 Feb 2017 15:46:34 +0800 Subject: [PATCH 1/2] [SPARK-19555][SQL] Improve the performance of StringUtils.escapeLikeRegex --- .../spark/sql/catalyst/util/StringUtils.scala | 40 +++++++++++-------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala index cde8bd5b9614..857fd1592237 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala @@ -17,31 +17,37 @@ package org.apache.spark.sql.catalyst.util +import java.lang.StringBuilder import java.util.regex.{Pattern, PatternSyntaxException} import org.apache.spark.unsafe.types.UTF8String object StringUtils { - // replace the _ with .{1} exactly match 1 time of any character - // replace the % with .*, match 0 or more times with any character def escapeLikeRegex(v: String): String = { if (!v.isEmpty) { - "(?s)" + (' ' +: v.init).zip(v).flatMap { - case (prev, '\\') => "" - case ('\\', c) => - c match { - case '_' => "_" - case '%' => "%" - case _ => Pattern.quote("\\" + c) - } - case (prev, c) => - c match { - case '_' => "." - case '%' => ".*" - case _ => Pattern.quote(Character.toString(c)) - } - }.mkString + val sb = new StringBuilder("(?s)") + var prev = ' ' + for (c <- v) { + val out = (prev, c) match { + case (prev, '\\') => "" + case ('\\', c) => + c match { + case '_' => "_" + case '%' => "%" + case _ => Pattern.quote("\\" + c) + } + case (prev, c) => + c match { + case '_' => "." + case '%' => ".*" + case _ => Pattern.quote(Character.toString(c)) + } + } + prev = c + sb.append(out) + } + sb.toString() } else { v } From e68eab0841bde02c773fb169191361f4eb55d742 Mon Sep 17 00:00:00 2001 From: Shuai Lin Date: Sat, 11 Feb 2017 15:50:35 +0800 Subject: [PATCH 2/2] Add back the comments. --- .../scala/org/apache/spark/sql/catalyst/util/StringUtils.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala index 857fd1592237..a60f8fb5a00a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala @@ -24,6 +24,8 @@ import org.apache.spark.unsafe.types.UTF8String object StringUtils { + // replace the _ with .{1} exactly match 1 time of any character + // replace the % with .*, match 0 or more times with any character def escapeLikeRegex(v: String): String = { if (!v.isEmpty) { val sb = new StringBuilder("(?s)")