Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ import java.io.Writer
import com.univocity.parsers.csv.CsvWriter

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.SpecializedGetters
import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, IntervalStringStyles, IntervalUtils, SparkStringUtils, TimestampFormatter}
import org.apache.spark.sql.catalyst.expressions.{SpecializedGetters, ToStringBase}
import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, IntervalStringStyles, IntervalUtils, TimestampFormatter}
import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
Expand Down Expand Up @@ -65,9 +65,11 @@ class UnivocityGenerator(
private val nullAsQuotedEmptyString =
SQLConf.get.getConf(SQLConf.LEGACY_NULL_VALUE_WRITTEN_AS_QUOTED_EMPTY_STRING_CSV)

private val binaryFormatter = ToStringBase.getBinaryFormatter

private def makeConverter(dataType: DataType): ValueConverter = dataType match {
case BinaryType =>
(getter, ordinal) => SparkStringUtils.getHexString(getter.getBinary(ordinal))
(getter, ordinal) => binaryFormatter(getter.getBinary(ordinal)).toString

case DateType =>
(getter, ordinal) => dateFormatter.format(getter.getInt(ordinal))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2790,7 +2790,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
case BINARY_HEX =>
val padding = if (value.length % 2 != 0) "0" else ""
try {
Literal(Hex.decodeHex(padding + value))
Literal(Hex.decodeHex(padding + value), BinaryType)
} catch {
case e: DecoderException =>
val ex = QueryParsingErrors.cannotParseValueTypeError("X", value, ctx)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,10 @@ SELECT array( X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'
-- !query analysis
Project [array(0x, 0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333, cast(Spark as binary)) AS array(X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST(Spark AS BINARY))#x]
+- OneRowRelation


-- !query
SELECT to_csv(named_struct('n', 1, 'info', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))
-- !query analysis
Project [to_csv(named_struct(n, 1, info, 0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333), Some(America/Los_Angeles)) AS to_csv(named_struct(n, 1, info, X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))#x]
+- OneRowRelation
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,10 @@ SELECT array( X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'
-- !query analysis
Project [array(0x, 0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333, cast(Spark as binary)) AS array(X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST(Spark AS BINARY))#x]
+- OneRowRelation


-- !query
SELECT to_csv(named_struct('n', 1, 'info', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))
-- !query analysis
Project [to_csv(named_struct(n, 1, info, 0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333), Some(America/Los_Angeles)) AS to_csv(named_struct(n, 1, info, X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))#x]
+- OneRowRelation
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,10 @@ SELECT array( X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'
-- !query analysis
Project [array(0x, 0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333, cast(Spark as binary)) AS array(X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST(Spark AS BINARY))#x]
+- OneRowRelation


-- !query
SELECT to_csv(named_struct('n', 1, 'info', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))
-- !query analysis
Project [to_csv(named_struct(n, 1, info, 0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333), Some(America/Los_Angeles)) AS to_csv(named_struct(n, 1, info, X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))#x]
+- OneRowRelation
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,10 @@ SELECT array( X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'
-- !query analysis
Project [array(0x, 0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333, cast(Spark as binary)) AS array(X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST(Spark AS BINARY))#x]
+- OneRowRelation


-- !query
SELECT to_csv(named_struct('n', 1, 'info', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))
-- !query analysis
Project [to_csv(named_struct(n, 1, info, 0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333), Some(America/Los_Angeles)) AS to_csv(named_struct(n, 1, info, X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))#x]
+- OneRowRelation
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
-- Automatically generated by SQLQueryTestSuite
-- !query
SELECT X''
-- !query analysis
Project [0x AS X''#x]
+- OneRowRelation


-- !query
SELECT X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'
-- !query analysis
Project [0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333 AS X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'#x]
+- OneRowRelation


-- !query
SELECT CAST('Spark' as BINARY)
-- !query analysis
Project [cast(Spark as binary) AS CAST(Spark AS BINARY)#x]
+- OneRowRelation


-- !query
SELECT array( X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST('Spark' as BINARY))
-- !query analysis
Project [array(0x, 0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333, cast(Spark as binary)) AS array(X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST(Spark AS BINARY))#x]
+- OneRowRelation


-- !query
SELECT to_csv(named_struct('n', 1, 'info', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))
-- !query analysis
Project [to_csv(named_struct(n, 1, info, 0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333), Some(America/Los_Angeles)) AS to_csv(named_struct(n, 1, info, X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))#x]
+- OneRowRelation
1 change: 1 addition & 0 deletions sql/core/src/test/resources/sql-tests/inputs/binary.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ SELECT X'';
SELECT X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333';
SELECT CAST('Spark' as BINARY);
SELECT array( X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST('Spark' as BINARY));
SELECT to_csv(named_struct('n', 1, 'info', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'));
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
--IMPORT binary.sql

--SET spark.sql.binaryOutputStyle=HEX_DISCRETE;
8 changes: 8 additions & 0 deletions sql/core/src/test/resources/sql-tests/results/binary.sql.out
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,11 @@ SELECT array( X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'
struct<array(X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST(Spark AS BINARY)):array<binary>>
-- !query output
[,Eason Yao 2018-11-17:13:33:33,Spark]


-- !query
SELECT to_csv(named_struct('n', 1, 'info', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))
-- !query schema
struct<to_csv(named_struct(n, 1, info, X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333')):string>
-- !query output
1,Eason Yao 2018-11-17:13:33:33
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,11 @@ SELECT array( X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'
struct<array(X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST(Spark AS BINARY)):array<binary>>
-- !query output
[,RWFzb24gWWFvIDIwMTgtMTEtMTc6MTM6MzM6MzM,U3Bhcms]


-- !query
SELECT to_csv(named_struct('n', 1, 'info', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))
-- !query schema
struct<to_csv(named_struct(n, 1, info, X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333')):string>
-- !query output
1,RWFzb24gWWFvIDIwMTgtMTEtMTc6MTM6MzM6MzM
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,11 @@ SELECT array( X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'
struct<array(X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST(Spark AS BINARY)):array<binary>>
-- !query output
[[],[69, 97, 115, 111, 110, 32, 89, 97, 111, 32, 50, 48, 49, 56, 45, 49, 49, 45, 49, 55, 58, 49, 51, 58, 51, 51, 58, 51, 51],[83, 112, 97, 114, 107]]


-- !query
SELECT to_csv(named_struct('n', 1, 'info', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))
-- !query schema
struct<to_csv(named_struct(n, 1, info, X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333')):string>
-- !query output
1,"[69, 97, 115, 111, 110, 32, 89, 97, 111, 32, 50, 48, 49, 56, 45, 49, 49, 45, 49, 55, 58, 49, 51, 58, 51, 51, 58, 51, 51]"
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,11 @@ SELECT array( X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'
struct<array(X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST(Spark AS BINARY)):array<binary>>
-- !query output
[,4561736F6E2059616F20323031382D31312D31373A31333A33333A3333,537061726B]


-- !query
SELECT to_csv(named_struct('n', 1, 'info', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))
-- !query schema
struct<to_csv(named_struct(n, 1, info, X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333')):string>
-- !query output
1,4561736F6E2059616F20323031382D31312D31373A31333A33333A3333
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
-- Automatically generated by SQLQueryTestSuite
-- !query
SELECT X''
-- !query schema
struct<X'':binary>
-- !query output
[]


-- !query
SELECT X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'
-- !query schema
struct<X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333':binary>
-- !query output
[45 61 73 6F 6E 20 59 61 6F 20 32 30 31 38 2D 31 31 2D 31 37 3A 31 33 3A 33 33 3A 33 33]


-- !query
SELECT CAST('Spark' as BINARY)
-- !query schema
struct<CAST(Spark AS BINARY):binary>
-- !query output
[53 70 61 72 6B]


-- !query
SELECT array( X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST('Spark' as BINARY))
-- !query schema
struct<array(X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST(Spark AS BINARY)):array<binary>>
-- !query output
[[],[45 61 73 6F 6E 20 59 61 6F 20 32 30 31 38 2D 31 31 2D 31 37 3A 31 33 3A 33 33 3A 33 33],[53 70 61 72 6B]]


-- !query
SELECT to_csv(named_struct('n', 1, 'info', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))
-- !query schema
struct<to_csv(named_struct(n, 1, info, X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333')):string>
-- !query output
1,[45 61 73 6F 6E 20 59 61 6F 20 32 30 31 38 2D 31 31 2D 31 37 3A 31 33 3A 33 33 3A 33 33]
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServ
// SPARK-47264
"collations.sql",
"binary_hex.sql",
"binary_hex_discrete.sql",
"binary_basic.sql",
"binary_base64.sql"
)
Expand Down