Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,19 @@ object HiveTypeCoercion {
})
}

/**
* Similar to [[findTightestCommonType]], if can not find the TightestCommonType, try to use
* [[findTightestCommonTypeToString]] to find the TightestCommonType.
*/
private def findTightestCommonTypeAndPromoteToString(types: Seq[DataType]): Option[DataType] = {
types.foldLeft[Option[DataType]](Some(NullType))((r, c) => r match {
case None => None
case Some(d) =>
findTightestCommonTypeOfTwo(d, c).orElse(findTightestCommonTypeToString(d, c))
})
}


/**
* Find the tightest common type of a set of types by continuously applying
* `findTightestCommonTypeOfTwo` on these types.
Expand Down Expand Up @@ -599,7 +612,7 @@ trait HiveTypeCoercion {
// compatible with every child column.
case Coalesce(es) if es.map(_.dataType).distinct.size > 1 =>
val types = es.map(_.dataType)
findTightestCommonType(types) match {
findTightestCommonTypeAndPromoteToString(types) match {
case Some(finalDataType) => Coalesce(es.map(Cast(_, finalDataType)))
case None =>
sys.error(s"Could not determine return type of Coalesce for ${types.mkString(",")}")
Expand Down Expand Up @@ -634,7 +647,7 @@ trait HiveTypeCoercion {
def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
case c: CaseWhenLike if c.childrenResolved && !c.valueTypesEqual =>
logDebug(s"Input values for null casting ${c.valueTypes.mkString(",")}")
val maybeCommonType = findTightestCommonType(c.valueTypes)
val maybeCommonType = findTightestCommonTypeAndPromoteToString(c.valueTypes)
maybeCommonType.map { commonType =>
val castedBranches = c.branches.grouped(2).map {
case Seq(when, value) if value.dataType != commonType =>
Expand All @@ -650,7 +663,8 @@ trait HiveTypeCoercion {
}.getOrElse(c)

case c: CaseKeyWhen if c.childrenResolved && !c.resolved =>
val maybeCommonType = findTightestCommonType((c.key +: c.whenList).map(_.dataType))
val maybeCommonType =
findTightestCommonTypeAndPromoteToString((c.key +: c.whenList).map(_.dataType))
maybeCommonType.map { commonType =>
val castedBranches = c.branches.grouped(2).map {
case Seq(when, then) if when.dataType != commonType =>
Expand Down
10 changes: 10 additions & 0 deletions sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,16 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll with SQLTestUtils {
Row("one", 6) :: Row("three", 3) :: Nil)
}

test("SPARK-8010: promote numeric to string") {
val df = Seq((1, 1)).toDF("key", "value")
df.registerTempTable("src")
val queryCaseWhen = sql("select case when true then 1.0 else '1' end from src ")
val queryCoalesce = sql("select coalesce(null, 1, '1') from src ")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like these two test queries are not really good examples. We can determine the data type at compile time since coalesce and case when ... at here basically represent a constant value. Actually, for these cases, why we use StringType as the data type?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@yhuai, actually we often write querys by using udf in then value and else value, like below:
select case when boolean then split(city_code, ',')[0] else -99 end from tablename
Hive will implicit convert the case when expression value to a string type since split function returns string type but else value is a integer.
Spark sql current will throw exceptions because the types of then value and else value is not convertible.
Why we use StringType is because when do implicit conversion in AtomicType, almost every type meets StringType will be converted to StringType except BinaryType and BooleanType

You can refer the chart at the bottom of the page:
https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My question was about your test case. case when true then 1.0 else '1' end is basically a constant. Why you think using a string type is good at here? Also, if this expression is optimized by our constant folding rule, you are not testing the execution side. A case like case when boolCol then 1.0 else '1' may be a better one.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems Hive will use StringType. I am fine with that.


checkAnswer(queryCaseWhen, Row("1.0") :: Nil)
checkAnswer(queryCoalesce, Row("1") :: Nil)
}

test("SPARK-6743: no columns from cache") {
Seq(
(83, 0, 38),
Expand Down