Skip to content

Commit 47c7a55

Browse files
committed
improvment
1 parent 48f39b8 commit 47c7a55

File tree

8 files changed

+161
-5
lines changed

8 files changed

+161
-5
lines changed

common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,8 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
102102

103103
private static final UTF8String COMMA_UTF8 = UTF8String.fromString(",");
104104
public static final UTF8String EMPTY_UTF8 = UTF8String.fromString("");
105+
public static final UTF8String ZERO_UTF8 = UTF8String.fromString("0");
106+
105107

106108
/**
107109
* Creates an UTF8String from byte array, which should be encoded in UTF-8.
@@ -1867,4 +1869,18 @@ public void read(Kryo kryo, Input in) {
18671869
in.read((byte[]) base);
18681870
}
18691871

1872+
public static UTF8String toBinaryString(long val) {
1873+
int zeros = Long.numberOfLeadingZeros(val);
1874+
if (zeros == Long.SIZE) {
1875+
return UTF8String.ZERO_UTF8;
1876+
} else {
1877+
int length = Math.max(Long.SIZE - Long.numberOfLeadingZeros(val), 1);
1878+
byte[] buf = new byte[length];
1879+
do {
1880+
buf[--length] = (byte) ((val & 0x1) == 1 ? '1': '0');
1881+
val >>>= 1;
1882+
} while(length > 0);
1883+
return fromBytes(buf);
1884+
}
1885+
}
18701886
}

common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1110,4 +1110,21 @@ public void isValid() {
11101110
testIsValid("0x9C 0x76 0x17", "0xEF 0xBF 0xBD 0x76 0x17");
11111111
}
11121112

1113+
@Test
1114+
public void toBinaryString() {
1115+
assertEquals(ZERO_UTF8, UTF8String.toBinaryString(0));
1116+
assertEquals(UTF8String.fromString("1"), UTF8String.toBinaryString(1));
1117+
assertEquals(UTF8String.fromString("10"), UTF8String.toBinaryString(2));
1118+
assertEquals(UTF8String.fromString("100"), UTF8String.toBinaryString(4));
1119+
assertEquals(UTF8String.fromString("111"), UTF8String.toBinaryString(7));
1120+
assertEquals(
1121+
UTF8String.fromString("1111111111111111111111111111111111111111111111111111111111110011"),
1122+
UTF8String.toBinaryString(-13));
1123+
assertEquals(
1124+
UTF8String.fromString("1000000000000000000000000000000000000000000000000000000000000000"),
1125+
UTF8String.toBinaryString(Long.MIN_VALUE));
1126+
assertEquals(
1127+
UTF8String.fromString("111111111111111111111111111111111111111111111111111111111111111"),
1128+
UTF8String.toBinaryString(Long.MAX_VALUE));
1129+
}
11131130
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1008,11 +1008,10 @@ case class Bin(child: Expression)
10081008
override def dataType: DataType = SQLConf.get.defaultStringType
10091009

10101010
protected override def nullSafeEval(input: Any): Any =
1011-
UTF8String.fromString(jl.Long.toBinaryString(input.asInstanceOf[Long]))
1011+
UTF8String.toBinaryString(input.asInstanceOf[Long])
10121012

10131013
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
1014-
defineCodeGen(ctx, ev, (c) =>
1015-
s"UTF8String.fromString(java.lang.Long.toBinaryString($c))")
1014+
defineCodeGen(ctx, ev, c => s"UTF8String.toBinaryString($c)")
10161015
}
10171016

10181017
override protected def withNewChildInternal(newChild: Expression): Bin = copy(child = newChild)
@@ -1021,7 +1020,6 @@ case class Bin(child: Expression)
10211020
object Hex {
10221021
private final val hexDigits =
10231022
Array[Byte]('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F')
1024-
private final val ZERO_UTF8 = UTF8String.fromBytes(Array[Byte]('0'))
10251023

10261024
// lookup table to translate '0' -> 0 ... 'F'/'f' -> 15
10271025
val unhexDigits = {
@@ -1053,7 +1051,7 @@ object Hex {
10531051

10541052
def hex(num: Long): UTF8String = {
10551053
val zeros = jl.Long.numberOfLeadingZeros(num)
1056-
if (zeros == jl.Long.SIZE) return ZERO_UTF8
1054+
if (zeros == jl.Long.SIZE) return UTF8String.ZERO_UTF8
10571055
val len = (jl.Long.SIZE - zeros + 3) / 4
10581056
var numBuf = num
10591057
val value = new Array[Byte](len)

sql/core/src/test/resources/sql-tests/analyzer-results/ansi/math.sql.out

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,3 +431,31 @@ SELECT conv('-9223372036854775807', 36, 10)
431431
-- !query analysis
432432
Project [conv(-9223372036854775807, 36, 10, true) AS conv(-9223372036854775807, 36, 10)#x]
433433
+- OneRowRelation
434+
435+
436+
-- !query
437+
SELECT BIN(0)
438+
-- !query analysis
439+
Project [bin(cast(0 as bigint)) AS bin(0)#x]
440+
+- OneRowRelation
441+
442+
443+
-- !query
444+
SELECT BIN(25)
445+
-- !query analysis
446+
Project [bin(cast(25 as bigint)) AS bin(25)#x]
447+
+- OneRowRelation
448+
449+
450+
-- !query
451+
SELECT BIN(25L)
452+
-- !query analysis
453+
Project [bin(25) AS bin(25)#x]
454+
+- OneRowRelation
455+
456+
457+
-- !query
458+
SELECT BIN(25.5)
459+
-- !query analysis
460+
Project [bin(cast(25.5 as bigint)) AS bin(25.5)#x]
461+
+- OneRowRelation

sql/core/src/test/resources/sql-tests/analyzer-results/math.sql.out

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,3 +431,31 @@ SELECT conv('-9223372036854775807', 36, 10)
431431
-- !query analysis
432432
Project [conv(-9223372036854775807, 36, 10, false) AS conv(-9223372036854775807, 36, 10)#x]
433433
+- OneRowRelation
434+
435+
436+
-- !query
437+
SELECT BIN(0)
438+
-- !query analysis
439+
Project [bin(cast(0 as bigint)) AS bin(0)#x]
440+
+- OneRowRelation
441+
442+
443+
-- !query
444+
SELECT BIN(25)
445+
-- !query analysis
446+
Project [bin(cast(25 as bigint)) AS bin(25)#x]
447+
+- OneRowRelation
448+
449+
450+
-- !query
451+
SELECT BIN(25L)
452+
-- !query analysis
453+
Project [bin(25) AS bin(25)#x]
454+
+- OneRowRelation
455+
456+
457+
-- !query
458+
SELECT BIN(25.5)
459+
-- !query analysis
460+
Project [bin(cast(25.5 as bigint)) AS bin(25.5)#x]
461+
+- OneRowRelation

sql/core/src/test/resources/sql-tests/inputs/math.sql

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,3 +77,8 @@ SELECT conv('9223372036854775808', 10, 16);
7777
SELECT conv('92233720368547758070', 10, 16);
7878
SELECT conv('9223372036854775807', 36, 10);
7979
SELECT conv('-9223372036854775807', 36, 10);
80+
81+
SELECT BIN(0);
82+
SELECT BIN(25);
83+
SELECT BIN(25L);
84+
SELECT BIN(25.5);

sql/core/src/test/resources/sql-tests/results/ansi/math.sql.out

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -797,3 +797,35 @@ org.apache.spark.SparkArithmeticException
797797
"fragment" : "conv('-9223372036854775807', 36, 10)"
798798
} ]
799799
}
800+
801+
802+
-- !query
803+
SELECT BIN(0)
804+
-- !query schema
805+
struct<bin(0):string>
806+
-- !query output
807+
0
808+
809+
810+
-- !query
811+
SELECT BIN(25)
812+
-- !query schema
813+
struct<bin(25):string>
814+
-- !query output
815+
11001
816+
817+
818+
-- !query
819+
SELECT BIN(25L)
820+
-- !query schema
821+
struct<bin(25):string>
822+
-- !query output
823+
11001
824+
825+
826+
-- !query
827+
SELECT BIN(25.5)
828+
-- !query schema
829+
struct<bin(25.5):string>
830+
-- !query output
831+
11001

sql/core/src/test/resources/sql-tests/results/math.sql.out

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -493,3 +493,35 @@ SELECT conv('-9223372036854775807', 36, 10)
493493
struct<conv(-9223372036854775807, 36, 10):string>
494494
-- !query output
495495
18446744073709551615
496+
497+
498+
-- !query
499+
SELECT BIN(0)
500+
-- !query schema
501+
struct<bin(0):string>
502+
-- !query output
503+
0
504+
505+
506+
-- !query
507+
SELECT BIN(25)
508+
-- !query schema
509+
struct<bin(25):string>
510+
-- !query output
511+
11001
512+
513+
514+
-- !query
515+
SELECT BIN(25L)
516+
-- !query schema
517+
struct<bin(25):string>
518+
-- !query output
519+
11001
520+
521+
522+
-- !query
523+
SELECT BIN(25.5)
524+
-- !query schema
525+
struct<bin(25.5):string>
526+
-- !query output
527+
11001

0 commit comments

Comments
 (0)