-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-16714][SQL] Refactor type widening for consistency #14389
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9774605
57bbe81
afca003
929c39f
071b01d
b9f94fe
ffd1734
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -49,7 +49,7 @@ object TypeCoercion { | |
| InConversion :: | ||
| WidenSetOperationTypes :: | ||
| PromoteStrings :: | ||
| DecimalPrecision :: | ||
| DecimalPrecision :: // DecimalPrecision must happen before ImplicitTypeCasts. | ||
| BooleanEquality :: | ||
| StringToIntegralCasts :: | ||
| FunctionArgumentConversion :: | ||
|
|
@@ -99,45 +99,13 @@ object TypeCoercion { | |
| case _ => None | ||
| } | ||
|
|
||
| /** Similar to [[findTightestCommonType]], but can promote all the way to StringType. */ | ||
| def findTightestCommonTypeToString(left: DataType, right: DataType): Option[DataType] = { | ||
| findTightestCommonTypeOfTwo(left, right).orElse((left, right) match { | ||
| case (StringType, t2: AtomicType) if t2 != BinaryType && t2 != BooleanType => Some(StringType) | ||
| case (t1: AtomicType, StringType) if t1 != BinaryType && t1 != BooleanType => Some(StringType) | ||
| case _ => None | ||
| }) | ||
| } | ||
|
|
||
| /** | ||
| * Similar to [[findTightestCommonType]], if can not find the TightestCommonType, try to use | ||
| * [[findTightestCommonTypeToString]] to find the TightestCommonType. | ||
| */ | ||
| private def findTightestCommonTypeAndPromoteToString(types: Seq[DataType]): Option[DataType] = { | ||
| types.foldLeft[Option[DataType]](Some(NullType))((r, c) => r match { | ||
| case None => None | ||
| case Some(d) => | ||
| findTightestCommonTypeToString(d, c) | ||
| }) | ||
| } | ||
|
|
||
| /** | ||
| * Find the tightest common type of a set of types by continuously applying | ||
| * `findTightestCommonTypeOfTwo` on these types. | ||
| */ | ||
| private def findTightestCommonType(types: Seq[DataType]): Option[DataType] = { | ||
| types.foldLeft[Option[DataType]](Some(NullType))((r, c) => r match { | ||
| case None => None | ||
| case Some(d) => findTightestCommonTypeOfTwo(d, c) | ||
| }) | ||
| } | ||
|
|
||
| /** | ||
| * Case 2 type widening (see the classdoc comment above for TypeCoercion). | ||
| * | ||
| * i.e. the main difference with [[findTightestCommonTypeOfTwo]] is that here we allow some | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also mention the string promotion here?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
| * loss of precision when widening decimal and double. | ||
| * loss of precision when widening decimal and double, and also widen all the way to string type. | ||
| */ | ||
| private def findWiderTypeForTwo(t1: DataType, t2: DataType): Option[DataType] = (t1, t2) match { | ||
| def findWiderTypeForTwo(t1: DataType, t2: DataType): Option[DataType] = (t1, t2) match { | ||
| case (t1: DecimalType, t2: DecimalType) => | ||
| Some(DecimalPrecision.widerDecimalType(t1, t2)) | ||
| case (t: IntegralType, d: DecimalType) => | ||
|
|
@@ -147,7 +115,13 @@ object TypeCoercion { | |
| case (_: FractionalType, _: DecimalType) | (_: DecimalType, _: FractionalType) => | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is no longer used |
||
| Some(DoubleType) | ||
| case _ => | ||
| findTightestCommonTypeToString(t1, t2) | ||
| findTightestCommonTypeOfTwo(t1, t2).orElse((t1, t2) match { | ||
| case (StringType, t2: AtomicType) if t2 != BinaryType && t2 != BooleanType => | ||
| Some(StringType) | ||
| case (t1: AtomicType, StringType) if t1 != BinaryType && t1 != BooleanType => | ||
| Some(StringType) | ||
| case _ => None | ||
| }) | ||
| } | ||
|
|
||
| private def findWiderCommonType(types: Seq[DataType]) = { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is no longer used. |
||
|
|
@@ -440,7 +414,7 @@ object TypeCoercion { | |
|
|
||
| case a @ CreateArray(children) if !haveSameType(children) => | ||
| val types = children.map(_.dataType) | ||
| findTightestCommonTypeAndPromoteToString(types) match { | ||
| findWiderCommonType(types) match { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does hive allow precision lose for this case?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In current master, yes it seems so, I fixed the example. It seems the precision is being truncated. and it seems it becomes to double when the types are different. I will look into the codes deeper and update you if you want. |
||
| case Some(finalDataType) => CreateArray(children.map(Cast(_, finalDataType))) | ||
| case None => a | ||
| } | ||
|
|
@@ -451,7 +425,7 @@ object TypeCoercion { | |
| m.keys | ||
| } else { | ||
| val types = m.keys.map(_.dataType) | ||
| findTightestCommonTypeAndPromoteToString(types) match { | ||
| findWiderCommonType(types) match { | ||
| case Some(finalDataType) => m.keys.map(Cast(_, finalDataType)) | ||
| case None => m.keys | ||
| } | ||
|
|
@@ -461,7 +435,7 @@ object TypeCoercion { | |
| m.values | ||
| } else { | ||
| val types = m.values.map(_.dataType) | ||
| findTightestCommonTypeAndPromoteToString(types) match { | ||
| findWiderCommonType(types) match { | ||
| case Some(finalDataType) => m.values.map(Cast(_, finalDataType)) | ||
| case None => m.values | ||
| } | ||
|
|
@@ -496,14 +470,14 @@ object TypeCoercion { | |
|
|
||
| case g @ Greatest(children) if !haveSameType(children) => | ||
| val types = children.map(_.dataType) | ||
| findTightestCommonType(types) match { | ||
| findWiderCommonType(types) match { | ||
| case Some(finalDataType) => Greatest(children.map(Cast(_, finalDataType))) | ||
| case None => g | ||
| } | ||
|
|
||
| case l @ Least(children) if !haveSameType(children) => | ||
| val types = children.map(_.dataType) | ||
| findTightestCommonType(types) match { | ||
| findWiderCommonType(types) match { | ||
| case Some(finalDataType) => Least(children.map(Cast(_, finalDataType))) | ||
| case None => l | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,58 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql | ||
|
|
||
| import java.math.BigDecimal | ||
|
|
||
| import org.apache.spark.sql.test.SharedSQLContext | ||
|
|
||
| /** | ||
| * End-to-end tests for type coercion. | ||
| */ | ||
| class SQLTypeCoercionSuite extends QueryTest with SharedSQLContext { | ||
|
|
||
| test("SPARK-16714 decimal widening") { | ||
| val v1 = new BigDecimal(1).divide(new BigDecimal(1000)) | ||
| val v2 = new BigDecimal(1).divide(new BigDecimal(10)).setScale(3) | ||
|
|
||
| checkAnswer( | ||
| sql("select map(0.001, 0.001, 0.1, 0.1)"), | ||
| Row(Map(v1 -> v1, v2 -> v2)) | ||
| ) | ||
|
|
||
| checkAnswer( | ||
| sql("select array(0.001, 0.1)"), | ||
| Row(Seq(v1, v2)) | ||
| ) | ||
|
|
||
| checkAnswer( | ||
| sql("select greatest(0.001, 0.1), least(0.001, 0.1)"), | ||
| Row(v2, v1) | ||
| ) | ||
|
|
||
| checkAnswer( | ||
| sql( | ||
| """ | ||
| |select ifnull(0.001, 0.1), nullif(0.001, 0.1), nvl2(0.001, 0.001, 0.1), nvl(0.001, 0.1), | ||
| | if(true, 0.001, 0.1) | ||
| """.stripMargin), | ||
| Row(v1, v1, v1, v1, v1) | ||
| ) | ||
| } | ||
|
|
||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I inlined this into findWiderTypeForTwo