-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-47210][SQL] Addition of implicit casting without indeterminate support #45383
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
b34544a
fdbfa44
ce9b027
e537190
b5a79c1
8321d0c
d178233
a4b9be7
a6e7662
e1d7ad5
b3b1356
7773d13
198a728
255b1ab
9ce417f
50f3aa2
ca0c84d
880a1b1
56d6c7c
94e5259
ccb52ba
9b1387b
c9974e1
fca9a65
660d664
c8edd93
a91490b
49a8d61
4c4cd84
66122a6
50f46e4
cc86a87
c01e80c
cc797a2
5d001ee
c68fc7d
1c926ab
dec39bf
e808446
ca1a23a
116931c
af487a2
4ba7055
e490e42
00e88e7
85b4d16
30f7225
e89a354
788dc06
75c0140
2918413
f6ed55a
98960c0
de623c8
a92b4e1
f7f3011
f67808e
815ce42
7fca38a
b19b0eb
a111f03
55bdd9b
a7228be
27a72c6
18ada04
38670af
01d891e
c5daf86
9ac5678
0f1757d
506c8c0
f743cf8
4f8fe1d
52bf4dc
7cbeafe
3e92e92
b23e106
880ebed
f96ecd9
e1e0cf4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,25 +22,12 @@ import javax.annotation.Nullable | |
| import scala.annotation.tailrec | ||
|
|
||
| import org.apache.spark.sql.catalyst.analysis.TypeCoercion.hasStringType | ||
| import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, Cast, Collate, ComplexTypeMergingExpression, CreateArray, Elt, ExpectsInputTypes, Expression, Predicate, SortOrder} | ||
| import org.apache.spark.sql.catalyst.expressions.{ArrayJoin, BinaryExpression, Cast, Collate, ComplexTypeMergingExpression, ConcatWs, CreateArray, Expression, In, InSubquery, Substring} | ||
| import org.apache.spark.sql.errors.QueryCompilationErrors | ||
| import org.apache.spark.sql.internal.SQLConf | ||
| import org.apache.spark.sql.types.{AbstractDataType, ArrayType, DataType, StringType} | ||
|
|
||
| object CollationTypeCasts extends TypeCoercionRule { | ||
| override val transform: PartialFunction[Expression, Expression] = { | ||
| case e if !e.childrenResolved => e | ||
| case sc @ (_: BinaryExpression | ||
| | _: ComplexTypeMergingExpression | ||
| | _: CreateArray | ||
| | _: Elt | ||
| | _: ExpectsInputTypes | ||
| | _: Predicate | ||
| | _: SortOrder) => | ||
| val newChildren = collateToSingleType(sc.children) | ||
| sc.withNewChildren(newChildren) | ||
| } | ||
|
|
||
| abstract class CollationTypeCasts extends TypeCoercionRule { | ||
| /** | ||
| * Extracts StringTypes from filtered hasStringType | ||
| */ | ||
|
|
@@ -104,6 +91,7 @@ object CollationTypeCasts extends TypeCoercionRule { | |
| val implicitTypes = expr.map(_.dataType) | ||
| .filter(hasStringType) | ||
| .map(extractStringType) | ||
| .filter(dt => dt.collationId != SQLConf.get.defaultStringType.collationId) | ||
cloud-fan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| if (hasMultipleImplicits(implicitTypes)) { | ||
mihailomilosevic2001 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| throw QueryCompilationErrors.implicitCollationMismatchError() | ||
|
|
@@ -127,3 +115,38 @@ object CollationTypeCasts extends TypeCoercionRule { | |
| .filter(dt => !(dt == SQLConf.get.defaultStringType.collationId)).distinct.size > 1 | ||
|
|
||
| } | ||
|
|
||
| /** | ||
| * This rule is used to collate all existing expressions related to StringType into a single | ||
| * collation. Arrays are handled using their elementType and should be cast for these expressions. | ||
| */ | ||
| object PreCollationTypeCasts extends CollationTypeCasts { | ||
| override val transform: PartialFunction[Expression, Expression] = { | ||
| case e if !e.childrenResolved => e | ||
| case sc@(_: In | ||
| | _: InSubquery | ||
| | _: CreateArray | ||
| | _: ComplexTypeMergingExpression | ||
|
||
| | _: ArrayJoin | ||
| | _: BinaryExpression | ||
| | _: ConcatWs | ||
| | _: Substring) => | ||
| val newChildren = collateToSingleType(sc.children) | ||
| sc.withNewChildren(newChildren) | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * This rule is used for managing expressions that have possible implicit casts from different | ||
| * types in ImplicitTypeCasts rule. | ||
| */ | ||
| object PostCollationTypeCasts extends CollationTypeCasts { | ||
|
||
| override val transform: PartialFunction[Expression, Expression] = { | ||
| case e if !e.childrenResolved => e | ||
| case sc@(_: ArrayJoin | ||
| | _: BinaryExpression | ||
| | _: Substring) => | ||
| val newChildren = collateToSingleType(sc.children) | ||
| sc.withNewChildren(newChildren) | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,8 +23,7 @@ import scala.annotation.tailrec | |
| import scala.collection.mutable | ||
|
|
||
| import org.apache.spark.internal.Logging | ||
| import org.apache.spark.sql.catalyst.analysis.CollationTypeCasts.{castStringType, getOutputCollation} | ||
| import org.apache.spark.sql.catalyst.analysis.TypeCoercion.hasStringType | ||
| import org.apache.spark.sql.catalyst.analysis.PreCollationTypeCasts.getOutputCollation | ||
| import org.apache.spark.sql.catalyst.expressions._ | ||
| import org.apache.spark.sql.catalyst.expressions.aggregate._ | ||
| import org.apache.spark.sql.catalyst.plans.logical._ | ||
|
|
@@ -660,9 +659,8 @@ abstract class TypeCoercionBase { | |
| val newIndex = implicitCast(index, IntegerType).getOrElse(index) | ||
| val newInputs = if (conf.eltOutputAsString || | ||
| !children.tail.map(_.dataType).forall(_ == BinaryType)) { | ||
| val st = getOutputCollation(children) | ||
| children.tail.map { e => | ||
| implicitCast(e, st).getOrElse(e) | ||
| implicitCast(e, SQLConf.get.defaultStringType).getOrElse(e) | ||
| } | ||
| } else { | ||
| children.tail | ||
|
|
@@ -707,39 +705,27 @@ abstract class TypeCoercionBase { | |
| }.getOrElse(b) // If there is no applicable conversion, leave expression unchanged. | ||
|
|
||
| case e: ImplicitCastInputTypes if e.inputTypes.nonEmpty => | ||
| val childrenBeforeCollations: Seq[Expression] = e.children.zip(e.inputTypes).map { | ||
| val children: Seq[Expression] = e.children.zip(e.inputTypes).map { | ||
| // If we cannot do the implicit cast, just use the original input. | ||
| case (in, expected) => implicitCast(in, expected).getOrElse(in) | ||
|
||
| } | ||
| val st = getOutputCollation(e.children) | ||
| val children: Seq[Expression] = childrenBeforeCollations.map { | ||
| case in if hasStringType(in.dataType) => | ||
| castStringType(in, st).getOrElse(in) | ||
| case in => in | ||
| } | ||
| e.withNewChildren(children) | ||
|
|
||
| case e: ExpectsInputTypes if e.inputTypes.nonEmpty => | ||
| // Convert NullType into some specific target type for ExpectsInputTypes that don't do | ||
| // general implicit casting. | ||
| val childrenBeforeCollations: Seq[Expression] = | ||
| val children: Seq[Expression] = | ||
| e.children.zip(e.inputTypes).map { case (in, expected) => | ||
|
||
| if (in.dataType == NullType && !expected.acceptsType(NullType)) { | ||
| Literal.create(null, expected.defaultConcreteType) | ||
| } else { | ||
| in | ||
| } | ||
| } | ||
| val st = getOutputCollation(e.children) | ||
| val children: Seq[Expression] = childrenBeforeCollations.map { | ||
| case in if hasStringType(in.dataType) => | ||
| castStringType(in, st).getOrElse(in) | ||
| case in => in | ||
| } | ||
| e.withNewChildren(children) | ||
|
|
||
| case udf: ScalaUDF if udf.inputTypes.nonEmpty => | ||
| val childrenBeforeCollations = udf.children.zip(udf.inputTypes).map { case (in, expected) => | ||
| val children = udf.children.zip(udf.inputTypes).map { case (in, expected) => | ||
| // Currently Scala UDF will only expect `AnyDataType` at top level, so this trick works. | ||
| // In the future we should create types like `AbstractArrayType`, so that Scala UDF can | ||
| // accept inputs of array type of arbitrary element type. | ||
|
|
@@ -752,12 +738,6 @@ abstract class TypeCoercionBase { | |
| ).getOrElse(in) | ||
| } | ||
| } | ||
| val st = getOutputCollation(udf.children) | ||
| val children: Seq[Expression] = childrenBeforeCollations.map { | ||
| case in if hasStringType(in.dataType) => | ||
| castStringType(in, st).getOrElse(in) | ||
| case in => in | ||
| } | ||
| udf.copy(children = children) | ||
| } | ||
|
|
||
|
|
@@ -860,7 +840,7 @@ object TypeCoercion extends TypeCoercionBase { | |
| UnpivotCoercion :: | ||
| WidenSetOperationTypes :: | ||
| new CombinedTypeCoercionRule( | ||
| CollationTypeCasts :: | ||
| PreCollationTypeCasts :: | ||
| InConversion :: | ||
| PromoteStrings :: | ||
| DecimalPrecision :: | ||
|
|
@@ -877,7 +857,8 @@ object TypeCoercion extends TypeCoercionBase { | |
| ImplicitTypeCasts :: | ||
| DateTimeOperations :: | ||
| WindowFrameCoercion :: | ||
| StringLiteralCoercion :: Nil) :: Nil | ||
| StringLiteralCoercion :: | ||
| PostCollationTypeCasts :: Nil) :: Nil | ||
|
|
||
| override def canCast(from: DataType, to: DataType): Boolean = Cast.canCast(from, to) | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.