Skip to content
Prev Previous commit
Next Next commit
add benchmarks
  • Loading branch information
stevomitric committed Nov 11, 2024
commit 2951355e92523e96021db3826d922d90396c418f
Original file line number Diff line number Diff line change
Expand Up @@ -509,18 +509,21 @@ private CollationSpecUTF8(

private static int collationNameToId(String originalName, String collationName)
throws SparkException {
// Have a check for UTF8_BINARY collation to early-out and not introduce any regression.
if (UTF8_BINARY_COLLATION.collationName.equals(collationName)) {
return UTF8_BINARY_COLLATION_ID;
}

int baseId;
String collationNamePrefix;

if (collationName.startsWith(UTF8_BINARY_COLLATION.collationName)) {
// Have a check for UTF8_BINARY collation to early-out and not introduce any regression.
if (collationName.length() == UTF8_BINARY_COLLATION.collationName.length()) {
return UTF8_BINARY_COLLATION_ID;
}
baseId = UTF8_BINARY_COLLATION_ID;
collationNamePrefix = UTF8_BINARY_COLLATION.collationName;
} else if (collationName.startsWith(UTF8_LCASE_COLLATION.collationName)) {
if (collationName.length() == UTF8_LCASE_COLLATION.collationName.length()) {
return UTF8_LCASE_COLLATION_ID;
}
baseId = UTF8_LCASE_COLLATION_ID;
collationNamePrefix = UTF8_LCASE_COLLATION.collationName;
} else {
Expand Down
48 changes: 24 additions & 24 deletions sql/core/benchmarks/CollationBenchmark-jdk21-results.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,53 +2,53 @@ OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
AMD EPYC 7763 64-Core Processor
collation unit benchmarks - equalsFunction: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
--------------------------------------------------------------------------------------------------------------------------
UTF8_BINARY 1349 1349 0 0.1 13485.4 1.0X
UTF8_LCASE 3559 3561 3 0.0 35594.3 2.6X
UNICODE 17580 17589 12 0.0 175803.6 13.0X
UNICODE_CI 17210 17212 2 0.0 172100.2 12.8X
UTF8_BINARY 1351 1352 1 0.1 13514.7 1.0X
UTF8_LCASE 2607 2607 1 0.0 26071.0 1.9X
UNICODE 16866 16883 25 0.0 168655.0 12.5X
UNICODE_CI 16656 16665 14 0.0 166557.4 12.3X

OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
AMD EPYC 7763 64-Core Processor
collation unit benchmarks - compareFunction: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
---------------------------------------------------------------------------------------------------------------------------
UTF8_BINARY 1740 1741 1 0.1 17398.8 1.0X
UTF8_LCASE 2630 2632 3 0.0 26301.0 1.5X
UNICODE 16732 16743 16 0.0 167319.7 9.6X
UNICODE_CI 16482 16492 14 0.0 164819.7 9.5X
UTF8_BINARY 1770 1792 31 0.1 17699.6 1.0X
UTF8_LCASE 2636 2637 2 0.0 26359.6 1.5X
UNICODE 16778 16793 21 0.0 167782.6 9.5X
UNICODE_CI 16548 16573 35 0.0 165480.3 9.3X

OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
AMD EPYC 7763 64-Core Processor
collation unit benchmarks - hashFunction: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
------------------------------------------------------------------------------------------------------------------------
UTF8_BINARY 2808 2808 0 0.0 28082.3 1.0X
UTF8_LCASE 5412 5413 1 0.0 54123.5 1.9X
UNICODE 70755 70787 44 0.0 707553.4 25.2X
UNICODE_CI 57639 57669 43 0.0 576390.0 20.5X
UTF8_BINARY 2823 2825 2 0.0 28234.5 1.0X
UTF8_LCASE 5450 5453 4 0.0 54504.2 1.9X
UNICODE 67704 67710 8 0.0 677043.1 24.0X
UNICODE_CI 53588 53597 12 0.0 535883.0 19.0X

OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
AMD EPYC 7763 64-Core Processor
collation unit benchmarks - contains: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
------------------------------------------------------------------------------------------------------------------------
UTF8_BINARY 9356 9357 0 0.0 93564.9 1.0X
UTF8_LCASE 24106 24129 33 0.0 241055.3 2.6X
UNICODE 368428 369053 883 0.0 3684284.1 39.4X
UNICODE_CI 417361 418242 1246 0.0 4173613.9 44.6X
UTF8_BINARY 1669 1670 0 0.1 16693.1 1.0X
UTF8_LCASE 16580 16605 35 0.0 165803.5 9.9X
UNICODE 306000 306150 212 0.0 3060004.1 183.3X
UNICODE_CI 310879 311340 651 0.0 3108793.2 186.2X

OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
AMD EPYC 7763 64-Core Processor
collation unit benchmarks - startsWith: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
------------------------------------------------------------------------------------------------------------------------
UTF8_BINARY 10941 10943 2 0.0 109411.5 1.0X
UTF8_LCASE 20041 20058 24 0.0 200410.1 1.8X
UNICODE 364296 365610 1859 0.0 3642958.8 33.3X
UNICODE_CI 424306 424888 823 0.0 4243062.7 38.8X
UTF8_BINARY 1971 1977 8 0.1 19713.4 1.0X
UTF8_LCASE 10314 10322 11 0.0 103140.2 5.2X
UNICODE 308019 308139 170 0.0 3080189.8 156.2X
UNICODE_CI 311860 312153 415 0.0 3118597.4 158.2X

OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
AMD EPYC 7763 64-Core Processor
collation unit benchmarks - endsWith: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
------------------------------------------------------------------------------------------------------------------------
UTF8_BINARY 10551 10556 7 0.0 105511.7 1.0X
UTF8_LCASE 20294 20300 9 0.0 202943.7 1.9X
UNICODE 384070 384554 684 0.0 3840704.6 36.4X
UNICODE_CI 441935 442184 352 0.0 4419351.4 41.9X
UTF8_BINARY 2045 2047 4 0.0 20446.1 1.0X
UTF8_LCASE 10726 10742 22 0.0 107257.0 5.2X
UNICODE 321474 322725 1769 0.0 3214741.8 157.2X
UNICODE_CI 329792 330077 404 0.0 3297918.1 161.3X

48 changes: 24 additions & 24 deletions sql/core/benchmarks/CollationBenchmark-results.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,53 +2,53 @@ OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
AMD EPYC 7763 64-Core Processor
collation unit benchmarks - equalsFunction: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
--------------------------------------------------------------------------------------------------------------------------
UTF8_BINARY 1372 1372 1 0.1 13715.2 1.0X
UTF8_LCASE 3847 3851 6 0.0 38467.3 2.8X
UNICODE 19659 19662 4 0.0 196587.1 14.3X
UNICODE_CI 19663 19666 3 0.0 196634.5 14.3X
UTF8_BINARY 1372 1372 0 0.1 13722.7 1.0X
UTF8_LCASE 3303 3306 4 0.0 33030.7 2.4X
UNICODE 19028 19028 1 0.0 190276.4 13.9X
UNICODE_CI 18836 18861 36 0.0 188358.6 13.7X

OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
AMD EPYC 7763 64-Core Processor
collation unit benchmarks - compareFunction: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
---------------------------------------------------------------------------------------------------------------------------
UTF8_BINARY 1706 1707 3 0.1 17056.0 1.0X
UTF8_LCASE 4016 4016 0 0.0 40164.0 2.4X
UNICODE 19545 19547 3 0.0 195453.4 11.5X
UNICODE_CI 19544 19547 5 0.0 195437.5 11.5X
UTF8_BINARY 1733 1744 14 0.1 17334.2 1.0X
UTF8_LCASE 4042 4060 25 0.0 40422.5 2.3X
UNICODE 19772 19785 19 0.0 197718.0 11.4X
UNICODE_CI 19706 19723 25 0.0 197056.4 11.4X

OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
AMD EPYC 7763 64-Core Processor
collation unit benchmarks - hashFunction: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
------------------------------------------------------------------------------------------------------------------------
UTF8_BINARY 3091 3092 1 0.0 30909.8 1.0X
UTF8_LCASE 6286 6287 2 0.0 62856.0 2.0X
UNICODE 65495 65528 47 0.0 654945.7 21.2X
UNICODE_CI 59987 59994 10 0.0 599868.6 19.4X
UTF8_BINARY 3084 3084 1 0.0 30836.0 1.0X
UTF8_LCASE 6358 6362 7 0.0 63577.1 2.1X
UNICODE 71671 71724 76 0.0 716708.2 23.2X
UNICODE_CI 55397 55435 54 0.0 553968.2 18.0X

OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
AMD EPYC 7763 64-Core Processor
collation unit benchmarks - contains: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
------------------------------------------------------------------------------------------------------------------------
UTF8_BINARY 13707 13726 27 0.0 137069.4 1.0X
UTF8_LCASE 28660 28685 36 0.0 286598.9 2.1X
UNICODE 363134 364168 1462 0.0 3631341.3 26.5X
UNICODE_CI 412158 412229 100 0.0 4121577.8 30.1X
UTF8_BINARY 1672 1672 0 0.1 16723.6 1.0X
UTF8_LCASE 17354 17355 2 0.0 173539.6 10.4X
UNICODE 306011 306025 19 0.0 3060113.1 183.0X
UNICODE_CI 299999 301017 1439 0.0 2999992.9 179.4X

OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
AMD EPYC 7763 64-Core Processor
collation unit benchmarks - startsWith: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
------------------------------------------------------------------------------------------------------------------------
UTF8_BINARY 12200 12205 8 0.0 121998.8 1.0X
UTF8_LCASE 27626 27633 9 0.0 276263.6 2.3X
UNICODE 350755 351083 464 0.0 3507553.8 28.8X
UNICODE_CI 409383 410380 1410 0.0 4093834.8 33.6X
UTF8_BINARY 2094 2094 0 0.0 20941.9 1.0X
UTF8_LCASE 17209 17212 5 0.0 172085.6 8.2X
UNICODE 296666 297034 520 0.0 2966663.8 141.7X
UNICODE_CI 301549 301712 230 0.0 3015490.3 144.0X

OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
AMD EPYC 7763 64-Core Processor
collation unit benchmarks - endsWith: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
------------------------------------------------------------------------------------------------------------------------
UTF8_BINARY 11879 11887 12 0.0 118786.3 1.0X
UTF8_LCASE 27743 27759 22 0.0 277434.4 2.3X
UNICODE 368435 368478 61 0.0 3684351.2 31.0X
UNICODE_CI 426350 426503 216 0.0 4263497.6 35.9X
UTF8_BINARY 2035 2036 2 0.0 20345.4 1.0X
UTF8_LCASE 17219 17223 5 0.0 172193.4 8.5X
UNICODE 312845 312940 134 0.0 3128445.2 153.8X
UNICODE_CI 314346 314501 220 0.0 3143456.6 154.5X