Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,10 @@ private CollationSpecUTF8(

private static int collationNameToId(String originalName, String collationName)
throws SparkException {
// Have a check for UTF8_BINARY collation to early-out and not introduce any regression.
if (UTF8_BINARY_COLLATION.collationName.equals(collationName)) {
return UTF8_BINARY_COLLATION_ID;
}

int baseId;
String collationNamePrefix;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,13 +113,13 @@ abstract class CollationBenchmarkBase extends BenchmarkBase {
warmupTime = 10.seconds,
output = output)
collationTypes.foreach { collationType => {
val collation = CollationFactory.fetchCollation(collationType)
val collationId = CollationFactory.collationNameToId(collationType)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we avoid touching the benchmark in the PR, @stevomitric ?

If we need this, we can proceed separately this CollationBenchmark change before your regression fix.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The regression was partially due to benchmarks as well. The CollationFactory.collationNameToId function should only be called fixed amount of times per query, which shouldn't measurably impact the performance of the query.

So i think this is the right PR for it, since it was the leading contributor to the numbers seen in the results.

benchmark.addCase(s"$collationType") { _ =>
sublistStrings.foreach { s1 =>
utf8Strings.foreach { s =>
(0 to 3).foreach { _ =>
CollationSupport.Contains.exec(
s, s1, CollationFactory.collationNameToId(collation.collationName)
s, s1, collationId
)
}
}
Expand All @@ -141,13 +141,13 @@ abstract class CollationBenchmarkBase extends BenchmarkBase {
warmupTime = 10.seconds,
output = output)
collationTypes.foreach { collationType => {
val collation = CollationFactory.fetchCollation(collationType)
val collationId = CollationFactory.collationNameToId(collationType)
benchmark.addCase(s"$collationType") { _ =>
sublistStrings.foreach { s1 =>
utf8Strings.foreach { s =>
(0 to 3).foreach { _ =>
CollationSupport.StartsWith.exec(
s, s1, CollationFactory.collationNameToId(collation.collationName)
s, s1, collationId
)
}
}
Expand All @@ -169,13 +169,13 @@ abstract class CollationBenchmarkBase extends BenchmarkBase {
warmupTime = 10.seconds,
output = output)
collationTypes.foreach { collationType => {
val collation = CollationFactory.fetchCollation(collationType)
val collationId = CollationFactory.collationNameToId(collationType)
benchmark.addCase(s"$collationType") { _ =>
sublistStrings.foreach { s1 =>
utf8Strings.foreach { s =>
(0 to 3).foreach { _ =>
CollationSupport.EndsWith.exec(
s, s1, CollationFactory.collationNameToId(collation.collationName)
s, s1, collationId
)
}
}
Expand Down