Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fixed if.
  • Loading branch information
rxin committed Jun 11, 2015
commit 3c32bbc8c1398fadebc999adc105b7958ccd4960
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ object FunctionRegistry {
expression[CreateArray]("array"),
expression[Coalesce]("coalesce"),
expression[Explode]("explode"),
// expression[If]("if"), TODO: turn this on after adding rules to auto cast types.
expression[If]("if"),
expression[IsNull]("isnull"),
expression[IsNotNull]("isnotnull"),
expression[Coalesce]("nvl"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ trait HiveTypeCoercion {
StringToIntegralCasts ::
FunctionArgumentConversion ::
CaseWhenCoercion ::
IfCoercion ::
Division ::
PropagateTypes ::
ExpectedInputConversion ::
Expand Down Expand Up @@ -652,6 +653,27 @@ trait HiveTypeCoercion {
}
}

/**
* Coerces the type of different branches of If statement to a common type.
*/
object IfCoercion extends Rule[LogicalPlan] {
def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
// Find tightest common type for If, if the true value and false value have different types.
case i @ If(pred, left, right) if left.dataType != right.dataType =>
findTightestCommonTypeOfTwo(left.dataType, right.dataType).map { widestType =>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hive support promote types to String for If, but our findTightestCommonTypeOfTwo doesn't support it. There are some other expressions like Coalesce, CaseWhen need to promote types to String. #6551 tries to fix it but it's not complete. I'm wondering if hive has a specific rule about string type promotion that we can follow, or is it all about hive's implicit conversions?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think we can define new TypeConversion class for Coalesce, If, CaseWhen, and so on. because in Hive these udf function use same one type Conversion rule as https://github.com/apache/hive/blob/ac755ebe26361a4647d53db2a28500f71697b276/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java#L79.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yup that's a good idea.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a rule to promote to string just for If for this patch. We can generalize it later.

val newLeft = if (left.dataType == widestType) left else Cast(left, widestType)
val newRight = if (right.dataType == widestType) right else Cast(right, widestType)
i.makeCopy(Array(pred, newLeft, newRight))
}.getOrElse(i) // If there is no applicable conversion, leave expression unchanged.

// Convert If(null literal, _, _) into boolean type.
// In the optimizer, we should short-circuit this directly into false value.
case i @ If(pred, left, right) if pred.dataType == NullType =>
println("fireing this rule")
i.makeCopy(Array(Literal.create(null, BooleanType), left, right))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this special case? Also, I'd just use Ifinstead of makeCopy here and above. Make copy is nice when you are matching on different but structurally similar expression, but looses compile time checks for arguments.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"if(null, true, false)" gets a nulltype.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what about

case i @ If (pred, _, _) if pred.dataType == NullType =>
  i.copy(predicate = Literal.create(null, BooleanType))

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1

}
}

/**
* Casts types according to the expected input types for Expressions that have the trait
* `ExpectsInputTypes`.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,19 @@ class HiveTypeCoercionSuite extends PlanTest {
:: Nil))
}

test("type coercion for If") {
val rule = new HiveTypeCoercion { }.IfCoercion
ruleTest(rule,
If(Literal(true), Literal(1), Literal(1L)),
If(Literal(true), Cast(Literal(1), LongType), Literal(1L))
)

ruleTest(rule,
If(Literal.create(null, NullType), Literal(1), Literal(1)),
If(Literal.create(null, BooleanType), Literal(1), Literal(1))
)
}

test("type coercion for CaseKeyWhen") {
val cwc = new HiveTypeCoercion {}.CaseWhenCoercion
ruleTest(cwc,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,52 @@ package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.types.{IntegerType, BooleanType}
import org.apache.spark.sql.types._


class ConditionalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {

test("if") {
val testcases = Seq[(java.lang.Boolean, Integer, Integer, Integer)](
(true, 1, 2, 1),
(false, 1, 2, 2),
(null, 1, 2, 2),
(true, null, 2, null),
(false, 1, null, null),
(null, null, 2, 2),
(null, 1, null, null)
)

// dataType must match T.
def testIf(convert: (Integer => Any), dataType: DataType): Unit = {
for ((predicate, trueValue, falseValue, expected) <- testcases) {
val trueValueConverted = if (trueValue == null) null else convert(trueValue)
val falseValueConverted = if (falseValue == null) null else convert(falseValue)
val expectedConverted = if (expected == null) null else convert(expected)

checkEvaluation(
If(Literal.create(predicate, BooleanType),
Literal.create(trueValueConverted, dataType),
Literal.create(falseValueConverted, dataType)),
expectedConverted)
}
}

testIf(_ == 1, BooleanType)
testIf(_.toShort, ShortType)
testIf(identity, IntegerType)
testIf(_.toLong, LongType)

testIf(_.toFloat, FloatType)
testIf(_.toDouble, DoubleType)
testIf(Decimal(_), DecimalType.Unlimited)

testIf(identity, DateType)
testIf(_.toLong, TimestampType)

testIf(_.toString, StringType)
}

test("case when") {
val row = create_row(null, false, true, "a", "b", "c")
val c1 = 'a.boolean.at(0)
Expand Down