Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ object DecimalPrecision extends Rule[LogicalPlan] {
def widerDecimalType(d1: DecimalType, d2: DecimalType): DecimalType = {
widerDecimalType(d1.precision, d1.scale, d2.precision, d2.scale)
}

// max(s1, s2) + max(p1-s1, p2-s2), max(s1, s2)
def widerDecimalType(p1: Int, s1: Int, p2: Int, s2: Int): DecimalType = {
val scale = max(s1, s2)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ object TypeCoercion {
InConversion ::
WidenSetOperationTypes ::
PromoteStrings ::
DecimalPrecision ::
DecimalPrecision :: // DecimalPrecision must happen before ImplicitTypeCasts.
BooleanEquality ::
StringToIntegralCasts ::
FunctionArgumentConversion ::
Expand Down Expand Up @@ -99,45 +99,13 @@ object TypeCoercion {
case _ => None
}

/** Similar to [[findTightestCommonType]], but can promote all the way to StringType. */
def findTightestCommonTypeToString(left: DataType, right: DataType): Option[DataType] = {
findTightestCommonTypeOfTwo(left, right).orElse((left, right) match {
case (StringType, t2: AtomicType) if t2 != BinaryType && t2 != BooleanType => Some(StringType)
case (t1: AtomicType, StringType) if t1 != BinaryType && t1 != BooleanType => Some(StringType)
case _ => None
})
}

/**
* Similar to [[findTightestCommonType]], if can not find the TightestCommonType, try to use
* [[findTightestCommonTypeToString]] to find the TightestCommonType.
*/
private def findTightestCommonTypeAndPromoteToString(types: Seq[DataType]): Option[DataType] = {
types.foldLeft[Option[DataType]](Some(NullType))((r, c) => r match {
case None => None
case Some(d) =>
findTightestCommonTypeToString(d, c)
})
}

/**
* Find the tightest common type of a set of types by continuously applying
* `findTightestCommonTypeOfTwo` on these types.
*/
private def findTightestCommonType(types: Seq[DataType]): Option[DataType] = {
types.foldLeft[Option[DataType]](Some(NullType))((r, c) => r match {
case None => None
case Some(d) => findTightestCommonTypeOfTwo(d, c)
})
}

/**
* Case 2 type widening (see the classdoc comment above for TypeCoercion).
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I inlined this into findWiderTypeForTwo

*
* i.e. the main difference with [[findTightestCommonTypeOfTwo]] is that here we allow some
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also mention the string promotion here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

* loss of precision when widening decimal and double.
* loss of precision when widening decimal and double, and also widen all the way to string type.
*/
private def findWiderTypeForTwo(t1: DataType, t2: DataType): Option[DataType] = (t1, t2) match {
def findWiderTypeForTwo(t1: DataType, t2: DataType): Option[DataType] = (t1, t2) match {
case (t1: DecimalType, t2: DecimalType) =>
Some(DecimalPrecision.widerDecimalType(t1, t2))
case (t: IntegralType, d: DecimalType) =>
Expand All @@ -147,7 +115,13 @@ object TypeCoercion {
case (_: FractionalType, _: DecimalType) | (_: DecimalType, _: FractionalType) =>
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is no longer used

Some(DoubleType)
case _ =>
findTightestCommonTypeToString(t1, t2)
findTightestCommonTypeOfTwo(t1, t2).orElse((t1, t2) match {
case (StringType, t2: AtomicType) if t2 != BinaryType && t2 != BooleanType =>
Some(StringType)
case (t1: AtomicType, StringType) if t1 != BinaryType && t1 != BooleanType =>
Some(StringType)
case _ => None
})
}

private def findWiderCommonType(types: Seq[DataType]) = {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is no longer used.

Expand Down Expand Up @@ -440,7 +414,7 @@ object TypeCoercion {

case a @ CreateArray(children) if !haveSameType(children) =>
val types = children.map(_.dataType)
findTightestCommonTypeAndPromoteToString(types) match {
findWiderCommonType(types) match {
Copy link
Contributor

@cloud-fan cloud-fan Jul 28, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does hive allow precision lose for this case?

Copy link
Member

@HyukjinKwon HyukjinKwon Jul 28, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In current master, yes it seems so,

I fixed the example. It seems the precision is being truncated.

hive> SELECT array(10000000000000000000.5BD, 1.00000000000000005123BD);
OK
[10000000000000000000.5,1.000000000000000051]
Time taken: 0.06 seconds, Fetched: 1 row(s)

and it seems

hive> SELECT array(10000000000000000000, 1.0000000000000005123BD);
OK
[1.0E19,1.0000000000000004]
Time taken: 0.061 seconds, Fetched: 1 row(s)

it becomes to double when the types are different. I will look into the codes deeper and update you if you want.

case Some(finalDataType) => CreateArray(children.map(Cast(_, finalDataType)))
case None => a
}
Expand All @@ -451,7 +425,7 @@ object TypeCoercion {
m.keys
} else {
val types = m.keys.map(_.dataType)
findTightestCommonTypeAndPromoteToString(types) match {
findWiderCommonType(types) match {
case Some(finalDataType) => m.keys.map(Cast(_, finalDataType))
case None => m.keys
}
Expand All @@ -461,7 +435,7 @@ object TypeCoercion {
m.values
} else {
val types = m.values.map(_.dataType)
findTightestCommonTypeAndPromoteToString(types) match {
findWiderCommonType(types) match {
case Some(finalDataType) => m.values.map(Cast(_, finalDataType))
case None => m.values
}
Expand Down Expand Up @@ -496,14 +470,14 @@ object TypeCoercion {

case g @ Greatest(children) if !haveSameType(children) =>
val types = children.map(_.dataType)
findTightestCommonType(types) match {
findWiderCommonType(types) match {
case Some(finalDataType) => Greatest(children.map(Cast(_, finalDataType)))
case None => g
}

case l @ Least(children) if !haveSameType(children) =>
val types = children.map(_.dataType)
findTightestCommonType(types) match {
findWiderCommonType(types) match {
case Some(finalDataType) => Least(children.map(Cast(_, finalDataType)))
case None => l
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ case class IfNull(left: Expression, right: Expression) extends RuntimeReplaceabl

override def replaceForTypeCoercion(): Expression = {
if (left.dataType != right.dataType) {
TypeCoercion.findTightestCommonTypeOfTwo(left.dataType, right.dataType).map { dtype =>
TypeCoercion.findWiderTypeForTwo(left.dataType, right.dataType).map { dtype =>
copy(left = Cast(left, dtype), right = Cast(right, dtype))
}.getOrElse(this)
} else {
Expand All @@ -116,7 +116,7 @@ case class NullIf(left: Expression, right: Expression) extends RuntimeReplaceabl

override def replaceForTypeCoercion(): Expression = {
if (left.dataType != right.dataType) {
TypeCoercion.findTightestCommonTypeOfTwo(left.dataType, right.dataType).map { dtype =>
TypeCoercion.findWiderTypeForTwo(left.dataType, right.dataType).map { dtype =>
copy(left = Cast(left, dtype), right = Cast(right, dtype))
}.getOrElse(this)
} else {
Expand All @@ -134,7 +134,7 @@ case class Nvl(left: Expression, right: Expression) extends RuntimeReplaceable {

override def replaceForTypeCoercion(): Expression = {
if (left.dataType != right.dataType) {
TypeCoercion.findTightestCommonTypeToString(left.dataType, right.dataType).map { dtype =>
TypeCoercion.findWiderTypeForTwo(left.dataType, right.dataType).map { dtype =>
copy(left = Cast(left, dtype), right = Cast(right, dtype))
}.getOrElse(this)
} else {
Expand All @@ -154,7 +154,7 @@ case class Nvl2(expr1: Expression, expr2: Expression, expr3: Expression)

override def replaceForTypeCoercion(): Expression = {
if (expr2.dataType != expr3.dataType) {
TypeCoercion.findTightestCommonTypeOfTwo(expr2.dataType, expr3.dataType).map { dtype =>
TypeCoercion.findWiderTypeForTwo(expr2.dataType, expr3.dataType).map { dtype =>
copy(expr2 = Cast(expr2, dtype), expr3 = Cast(expr3, dtype))
}.getOrElse(this)
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,6 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite {
test("check types for Greatest/Least") {
for (operator <- Seq[(Seq[Expression] => Expression)](Greatest, Least)) {
assertError(operator(Seq('booleanField)), "requires at least 2 arguments")
assertError(operator(Seq('intField, 'stringField)), "should all have the same type")
assertError(operator(Seq('intField, 'decimalField)), "should all have the same type")
assertError(operator(Seq('mapField, 'mapField)), "does not support ordering")
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql

import java.math.BigDecimal

import org.apache.spark.sql.test.SharedSQLContext

/**
* End-to-end tests for type coercion.
*/
class SQLTypeCoercionSuite extends QueryTest with SharedSQLContext {

test("SPARK-16714 decimal widening") {
val v1 = new BigDecimal(1).divide(new BigDecimal(1000))
val v2 = new BigDecimal(1).divide(new BigDecimal(10)).setScale(3)

checkAnswer(
sql("select map(0.001, 0.001, 0.1, 0.1)"),
Row(Map(v1 -> v1, v2 -> v2))
)

checkAnswer(
sql("select array(0.001, 0.1)"),
Row(Seq(v1, v2))
)

checkAnswer(
sql("select greatest(0.001, 0.1), least(0.001, 0.1)"),
Row(v2, v1)
)

checkAnswer(
sql(
"""
|select ifnull(0.001, 0.1), nullif(0.001, 0.1), nvl2(0.001, 0.001, 0.1), nvl(0.001, 0.1),
| if(true, 0.001, 0.1)
""".stripMargin),
Row(v1, v1, v1, v1, v1)
)
}

}