Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import org.apache.spark.sql.execution.HiveResult
import org.apache.spark.sql.execution.command.SetCommand
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval
import org.apache.spark.util.{Utils => SparkUtils}

private[hive] class SparkExecuteStatementOperation(
Expand Down Expand Up @@ -104,6 +105,8 @@ private[hive] class SparkExecuteStatementOperation(
to += from.getAs[Timestamp](ordinal)
case BinaryType =>
to += from.getAs[Array[Byte]](ordinal)
case CalendarIntervalType =>
to += HiveResult.toHiveString((from.getAs[CalendarInterval](ordinal), CalendarIntervalType))
case _: ArrayType | _: StructType | _: MapType | _: UserDefinedType[_] =>
val hiveString = HiveResult.toHiveString((from.get(ordinal), dataTypes(ordinal)))
to += hiveString
Expand Down Expand Up @@ -308,7 +311,11 @@ private[hive] class SparkExecuteStatementOperation(
object SparkExecuteStatementOperation {
def getTableSchema(structType: StructType): TableSchema = {
val schema = structType.map { field =>
val attrTypeString = if (field.dataType == NullType) "void" else field.dataType.catalogString
val attrTypeString = field.dataType match {
case NullType => "void"
case CalendarIntervalType => StringType.catalogString
case other => other.catalogString
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not related to Interval itself, but shouldn't it also return "string" for case ArrayType | _: StructType | _: MapType | _: UserDefinedType[_] here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is because Type does not have a type mapping to our CalendarIntervalType:

public enum Type {
NULL_TYPE("VOID",
java.sql.Types.NULL,
TTypeId.NULL_TYPE),
BOOLEAN_TYPE("BOOLEAN",
java.sql.Types.BOOLEAN,
TTypeId.BOOLEAN_TYPE),
TINYINT_TYPE("TINYINT",
java.sql.Types.TINYINT,
TTypeId.TINYINT_TYPE),
SMALLINT_TYPE("SMALLINT",
java.sql.Types.SMALLINT,
TTypeId.SMALLINT_TYPE),
INT_TYPE("INT",
java.sql.Types.INTEGER,
TTypeId.INT_TYPE),
BIGINT_TYPE("BIGINT",
java.sql.Types.BIGINT,
TTypeId.BIGINT_TYPE),
FLOAT_TYPE("FLOAT",
java.sql.Types.FLOAT,
TTypeId.FLOAT_TYPE),
DOUBLE_TYPE("DOUBLE",
java.sql.Types.DOUBLE,
TTypeId.DOUBLE_TYPE),
STRING_TYPE("STRING",
java.sql.Types.VARCHAR,
TTypeId.STRING_TYPE),
CHAR_TYPE("CHAR",
java.sql.Types.CHAR,
TTypeId.CHAR_TYPE,
true, false, false),
VARCHAR_TYPE("VARCHAR",
java.sql.Types.VARCHAR,
TTypeId.VARCHAR_TYPE,
true, false, false),
DATE_TYPE("DATE",
java.sql.Types.DATE,
TTypeId.DATE_TYPE),
TIMESTAMP_TYPE("TIMESTAMP",
java.sql.Types.TIMESTAMP,
TTypeId.TIMESTAMP_TYPE),
INTERVAL_YEAR_MONTH_TYPE("INTERVAL_YEAR_MONTH",
java.sql.Types.OTHER,
TTypeId.INTERVAL_YEAR_MONTH_TYPE),
INTERVAL_DAY_TIME_TYPE("INTERVAL_DAY_TIME",
java.sql.Types.OTHER,
TTypeId.INTERVAL_DAY_TIME_TYPE),
BINARY_TYPE("BINARY",
java.sql.Types.BINARY,
TTypeId.BINARY_TYPE),
DECIMAL_TYPE("DECIMAL",
java.sql.Types.DECIMAL,
TTypeId.DECIMAL_TYPE,
true, false, false),
ARRAY_TYPE("ARRAY",
java.sql.Types.ARRAY,
TTypeId.ARRAY_TYPE,
true, true),
MAP_TYPE("MAP",
java.sql.Types.JAVA_OBJECT,
TTypeId.MAP_TYPE,
true, true),
STRUCT_TYPE("STRUCT",
java.sql.Types.STRUCT,
TTypeId.STRUCT_TYPE,
true, false),
UNION_TYPE("UNIONTYPE",
java.sql.Types.OTHER,
TTypeId.UNION_TYPE,
true, false),
USER_DEFINED_TYPE("USER_DEFINED",
java.sql.Types.OTHER,
TTypeId.USER_DEFINED_TYPE,
true, false);

Otherwise:

Caused by: java.lang.IllegalArgumentException: Unrecognized type name: interval
	at org.apache.hive.service.cli.Type.getType(Type.java:169)
	at org.apache.hive.service.cli.TypeDescriptor.<init>(TypeDescriptor.java:53)
	at org.apache.hive.service.cli.ColumnDescriptor.<init>(ColumnDescriptor.java:53)
	at org.apache.hive.service.cli.TableSchema.<init>(TableSchema.java:52)
	at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$.getTableSchema(SparkExecuteStatementOperation.scala:321)
	at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.resultSchema$lzycompute(SparkExecuteStatementOperation.scala:70)
	at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.resultSchema(SparkExecuteStatementOperation.scala:65)
	at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.getResultSetSchema(SparkExecuteStatementOperation.scala:161)
	at org.apache.hive.service.cli.operation.OperationManager.getOperationResultSetSchema(OperationManager.java:209)
	at org.apache.hive.service.cli.session.HiveSessionImpl.getResultSetMetadata(HiveSessionImpl.java:773)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:78)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But the query actually returns a string for Arrays, Maps, Structs etc. Should it tell the client that it returns an ARRAY_TYPE or MAP_TYPE or STRUCT_TYPE if it in reality returns a string?

Copy link
Member Author

@wangyum wangyum Sep 3, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we do not need to return string for ArrayType | _: StructType | _: MapType | _: UserDefinedType[_] because this will not change the actual return type. Just a workaround for the interval type:

0: jdbc:hive2://localhost:10000> SELECT interval '1' year '2' day AS i;
+--------------------------+--+
|            i             |
+--------------------------+--+
| interval 1 years 2 days  |
+--------------------------+--+
1 row selected (0.032 seconds)
0: jdbc:hive2://localhost:10000> DESC SELECT interval '1' year '2' day AS i;
+-----------+------------+----------+--+
| col_name  | data_type  | comment  |
+-----------+------------+----------+--+
| i         | interval   | NULL     |
+-----------+------------+----------+--+

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok. I understand now.
I was wondering how does it work that if you here create a TableSchema with a complex type like Array, and then this TableSchema is used through getResultSetSchema to create the resultRowSet, how does the RowSet know that we are in fact going to return String, and not an Array.
But it goes all the way down to Column in the constructed ColumnBasedSet in https://github.com/apache/spark/blob/master/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/Column.java#L123 that assumes String for any non-primitive type. (or to conversions in ColumnValue for RowBasedSet in older protocol versions https://github.com/apache/spark/blob/master/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/ColumnValue.java#L198).

Now I understand. Thank you 👍

}
new FieldSchema(field.name, attrTypeString, field.getComment.getOrElse(""))
}
new TableSchema(schema.asJava)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,21 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
assert(rs.getBigDecimal(1) === new java.math.BigDecimal("1.000000000000000000"))
}
}

test("Support interval type") {
withJdbcStatement() { statement =>
val rs = statement.executeQuery("SELECT interval 3 months 1 hours")
assert(rs.next())
assert(rs.getString(1) === "interval 3 months 1 hours")
}
// Invalid interval value
withJdbcStatement() { statement =>
val e = intercept[SQLException] {
statement.executeQuery("SELECT interval 3 months 1 hou")
}
assert(e.getMessage.contains("org.apache.spark.sql.catalyst.parser.ParseException"))
}
}
}

class SingleSessionSuite extends HiveThriftJdbcTest {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -261,10 +261,10 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest {
}
}

// We do not fully support interval type
ignore(s"$version get interval type") {
test(s"$version get interval type") {
testExecuteStatementWithProtocolVersion(version, "SELECT interval '1' year '2' day") { rs =>
assert(rs.next())
assert(rs.getString(1) === "interval 1 years 2 days")
}
}

Expand Down