Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
d88555c
Factored in null count.:
GideonPotok May 4, 2024
0f0eedf
fixed up tests according to expectations surrounding nulls.
GideonPotok May 7, 2024
e1a533f
mode benchmark
GideonPotok May 7, 2024
a98eebe
mode benchmark
GideonPotok May 7, 2024
e4bf907
remove class member for collatrion enabled
GideonPotok May 7, 2024
c89af54
remove class member for collatrion enabled
GideonPotok May 7, 2024
794b20a
remove class member for collatrion enabled
GideonPotok May 7, 2024
3e891df
dataType check can be incorporated into the previous test, so this te…
GideonPotok May 7, 2024
0849a21
dataType check can be incorporated into the previous test, so this te…
GideonPotok May 7, 2024
e79e14e
scalastyle
GideonPotok May 7, 2024
9a32243
scalastyle
GideonPotok May 7, 2024
506f7fc
fix add back ._1
GideonPotok May 8, 2024
16ed98f
fix up
GideonPotok May 8, 2024
6891c9b
tests pass
GideonPotok May 8, 2024
3a0edac
tests pass
GideonPotok May 8, 2024
abea836
test
GideonPotok May 8, 2024
7fc7561
import _
GideonPotok May 8, 2024
05fe1a9
added bm results
GideonPotok May 9, 2024
a5710d1
tests pass
GideonPotok May 10, 2024
76f089f
tests pass
GideonPotok May 10, 2024
d8ea771
tests pass
GideonPotok May 10, 2024
cc63899
Merge branch 'master' into spark_47353_3
GideonPotok May 10, 2024
af187ac
Update sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expr…
GideonPotok May 13, 2024
184317d
removed withCollatedString
GideonPotok May 13, 2024
cd74bc8
buff->buffer
GideonPotok May 13, 2024
8f2525a
added the jdk 17 benchmarks.
GideonPotok May 13, 2024
7aca2e3
better benchmark
GideonPotok May 13, 2024
81e2b44
better benchmark
GideonPotok May 13, 2024
dec21a5
better benchmark
GideonPotok May 13, 2024
045c007
Update sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expr…
GideonPotok May 14, 2024
d85c052
up to date benchmarks
GideonPotok May 14, 2024
af016f4
scalastyle
GideonPotok May 14, 2024
3fbe2b2
scalastyle
GideonPotok May 14, 2024
ab7fa8e
tests with higher unicode planes - corner cases
GideonPotok May 14, 2024
c86e01a
removed those unicode tests for now at least
GideonPotok May 14, 2024
08a3e0a
removed extra benchmarks
GideonPotok May 14, 2024
6ff346d
Merge branch 'master' into spark_47353_3
GideonPotok May 14, 2024
d3911cb
move tests to CollationSQLExpressionsSuite
GideonPotok May 14, 2024
904b9c5
undo inadvertant change
GideonPotok May 14, 2024
4ae4534
imports
GideonPotok May 14, 2024
0fac136
Add back old benchmark logic, too.
GideonPotok May 14, 2024
9b60a31
Update sql/core/src/test/scala/org/apache/spark/sql/CollationStringEx…
GideonPotok May 14, 2024
36f38e3
wip
GideonPotok May 15, 2024
0f2a456
wip
GideonPotok May 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
better benchmark
  • Loading branch information
GideonPotok committed May 13, 2024
commit dec21a503050ebef59e2ca252cca4671be5e4d56
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
package org.apache.spark.sql

import org.apache.spark.SparkConf
import org.apache.spark.sql.catalyst.expressions.aggregate.Mode
import org.apache.spark.sql.catalyst.expressions.{ExpressionEvalHelper, Literal, StringTrim, StringTrimLeft, StringTrimRight}
import org.apache.spark.sql.catalyst.expressions.aggregate.Mode
import org.apache.spark.sql.catalyst.util.CollationFactory
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SharedSparkSession
Expand Down Expand Up @@ -880,7 +880,7 @@ class CollationStringExpressionsSuite
assert(myMode.eval(buffer).toString.toLowerCase() == t.result.toLowerCase())
})
}

test("StringTrim* functions - unit tests for both paths (codegen and eval)") {
// Without trimString param.
checkEvaluation(StringTrim(Literal.create( " asd ", StringType("UTF8_BINARY"))), "asd")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
import org.apache.spark.sql.catalyst.expressions.Literal
import org.apache.spark.sql.catalyst.expressions.aggregate.Mode
import org.apache.spark.sql.catalyst.util.{CollationFactory, CollationSupport}
import org.apache.spark.sql.types.{IntegerType, StringType}
import org.apache.spark.sql.types.StringType
import org.apache.spark.unsafe.types.UTF8String
import org.apache.spark.util.collection.OpenHashMap

Expand Down Expand Up @@ -249,7 +249,7 @@ object CollationBenchmark extends CollationBenchmarkBase {
Lowercase and some repeated strings to test the performance of the collation functions.
*/
def generateBaseInputStringswithUniqueGroupNumber(n: Long): Seq[UTF8String] = {
(0 to n / baseInputStrings.size).flatMap(k => baseInputStrings.map(
(0 to n.toInt / baseInputStrings.size).flatMap(k => baseInputStrings.map(
x => UTF8String.fromString(x + "_" + k)))
.flatMap(
x => Seq(x, x.repeat(4), x.repeat(8))) // Variable Lengths...
Expand Down