apache · gengliangwang · Nov 22, 2021 · Nov 22, 2021 · Nov 22, 2021 · Nov 22, 2021
diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
@@ -221,7 +221,6 @@ This is a graphical depiction of the precedence list as a directed tree:
 The least common type from a set of types is the narrowest type reachable from the precedence list by all elements of the set of types.
 
 The least common type resolution is used to:
-- Decide whether a function expecting a parameter of a type can be invoked using an argument of a narrower type.
 - Derive the argument type for functions which expect a shared argument type for multiple parameters, such as coalesce, least, or greatest.
 - Derive the operand types for operators such as arithmetic operations or comparisons.
 - Derive the result type for expressions such as the case expression.
@@ -246,19 +245,40 @@ DOUBLE
 > SELECT (typeof(coalesce(1BD, 1F)));
 DOUBLE
 
--- The substring function expects arguments of type INT for the start and length parameters.
-> SELECT substring('hello', 1Y, 2);
-he
-> SELECT substring('hello', '1', 2);
-he
-> SELECT substring('hello', 1L, 2);
-Error: Argument 2 requires an INT type.
-> SELECT substring('hello', str, 2) FROM VALUES(CAST('1' AS STRING)) AS T(str);
-Error: Argument 2 requires an INT type.
 ```
 
 ### SQL Functions
+#### Function invocation
+Under ANSI mode(spark.sql.ansi.enabled=true), the function invocation of Spark SQL:
+- In general, it follows the `Store assignment` rules as storing the input values as the declared parameter type of the SQL functions
+- Special rules apply for string literals and untyped NULL. A NULL can be promoted to any other type, while a string literal can be promoted to any simple data type.
 
+```sql
+> SET spark.sql.ansi.enabled=true;
+-- implicitly cast Int to String type
+> SELECT concat('total number: ', 1);
+total number: 1
+-- implicitly cast Timestamp to Date type
+> select datediff(now(), current_date);
+0
+
+-- specialrule: implicitly cast String literal to Double type
+> SELECT ceil('0.1');
+1
+-- specialrule: implicitly cast NULL to Date type
+> SELECT year(null);
+NULL
+
+> CREATE TABLE t(s string);
+-- Can't store String column as Numeric types.
+> SELECT ceil(s) from t;
+Error in query: cannot resolve 'CEIL(spark_catalog.default.t.s)' due to data type mismatch
+-- Can't store String column as Date type.
+> select year(s) from t;
+Error in query: cannot resolve 'year(spark_catalog.default.t.s)' due to data type mismatch
+```
+
+#### Functions with different behaviors
 The behavior of some SQL functions can be different under ANSI mode (`spark.sql.ansi.enabled=true`).
   - `size`: This function returns null for null input.
   - `element_at`:

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
@@ -159,6 +159,10 @@ object AnsiTypeCoercion extends TypeCoercionBase {
       // If the expected type equals the input type, no need to cast.
       case _ if expectedType.acceptsType(inType) => Some(inType)
 
+      // If input is a numeric type but not decimal, and we expect a decimal type,
+      // cast the input to decimal.
+      case (n: NumericType, DecimalType) => Some(DecimalType.forType(n))
+
       // Cast null type (usually from null literals) into target types
       // By default, the result type is `target.defaultConcreteType`. When the target type is
       // `TypeCollection`, there is another branch to find the "closet convertible data type" below.
@@ -178,79 +182,17 @@ object AnsiTypeCoercion extends TypeCoercionBase {
       case (StringType, DecimalType) if isInputFoldable =>
         Some(DecimalType.SYSTEM_DEFAULT)
 
-      // If input is a numeric type but not decimal, and we expect a decimal type,
-      // cast the input to decimal.
-      case (d: NumericType, DecimalType) => Some(DecimalType.forType(d))
-
-      case (n1: NumericType, n2: NumericType) =>
-        val widerType = findWiderTypeForTwo(n1, n2)
-        widerType match {
-          // if the expected type is Float type, we should still return Float type.
-          case Some(DoubleType) if n1 != DoubleType && n2 == FloatType => Some(FloatType)
-
-          case Some(dt) if dt == n2 => Some(dt)
-
-          case _ => None
+      case (_, target: DataType) =>
+        if (Cast.canANSIStoreAssign(inType, target)) {
+          Some(target)
+        } else {
+          None
         }
 
-      case (DateType, TimestampType) => Some(TimestampType)
-      case (DateType, AnyTimestampType) => Some(AnyTimestampType.defaultConcreteType)
-
       // When we reach here, input type is not acceptable for any types in this type collection,
-      // first try to find the all the expected types we can implicitly cast:
-      //   1. if there is no convertible data types, return None;
-      //   2. if there is only one convertible data type, cast input as it;
-      //   3. otherwise if there are multiple convertible data types, find the closet convertible
-      //      data type among them. If there is no such a data type, return None.
+      // try to find the first one we can implicitly cast.
       case (_, TypeCollection(types)) =>
-        // Since Spark contains special objects like `NumericType` and `DecimalType`, which accepts
-        // multiple types and they are `AbstractDataType` instead of `DataType`, here we use the
-        // conversion result their representation.
-        val convertibleTypes = types.flatMap(implicitCast(inType, _, isInputFoldable))
-        if (convertibleTypes.isEmpty) {
-          None
-        } else {
-          // find the closet convertible data type, which can be implicit cast to all other
-          // convertible types.
-          val closestConvertibleType = convertibleTypes.find { dt =>
-            convertibleTypes.forall { target =>
-              implicitCast(dt, target, isInputFoldable = false).isDefined
-            }
-          }
-          // If the closet convertible type is Float type and the convertible types contains Double
-          // type, simply return Double type as the closet convertible type to avoid potential
-          // precision loss on converting the Integral type as Float type.
-          if (closestConvertibleType.contains(FloatType) && convertibleTypes.contains(DoubleType)) {
-            Some(DoubleType)
-          } else {
-            closestConvertibleType
-          }
-        }
-
-      // Implicit cast between array types.
-      //
-      // Compare the nullabilities of the from type and the to type, check whether the cast of
-      // the nullability is resolvable by the following rules:
-      // 1. If the nullability of the to type is true, the cast is always allowed;
-      // 2. If the nullabilities of both the from type and the to type are false, the cast is
-      //    allowed.
-      // 3. Otherwise, the cast is not allowed
-      case (ArrayType(fromType, containsNullFrom), ArrayType(toType: DataType, containsNullTo))
-          if Cast.resolvableNullability(containsNullFrom, containsNullTo) =>
-        implicitCast(fromType, toType, isInputFoldable).map(ArrayType(_, containsNullTo))
-
-      // Implicit cast between Map types.
-      // Follows the same semantics of implicit casting between two array types.
-      // Refer to documentation above.
-      case (MapType(fromKeyType, fromValueType, fn), MapType(toKeyType, toValueType, tn))
-          if Cast.resolvableNullability(fn, tn) =>
-        val newKeyType = implicitCast(fromKeyType, toKeyType, isInputFoldable)
-        val newValueType = implicitCast(fromValueType, toValueType, isInputFoldable)
-        if (newKeyType.isDefined && newValueType.isDefined) {
-          Some(MapType(newKeyType.get, newValueType.get, tn))
-        } else {
-          None
-        }
+        types.flatMap(implicitCast(inType, _, isInputFoldable)).headOption
 
       case _ => None
     }
@@ -348,6 +290,9 @@ object AnsiTypeCoercion extends TypeCoercionBase {
       // Skip nodes who's children have not been resolved yet.
       case e if !e.childrenResolved => e
 
+      case d @ DateAdd(AnyTimestampType(), _) => d.copy(startDate = Cast(d.startDate, DateType))
+      case d @ DateSub(AnyTimestampType(), _) => d.copy(startDate = Cast(d.startDate, DateType))
+
       case s @ SubtractTimestamps(DateType(), AnyTimestampType(), _, _) =>
         s.copy(left = Cast(s.left, s.right.dataType))
       case s @ SubtractTimestamps(AnyTimestampType(), DateType(), _, _) =>

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -1157,9 +1157,9 @@ object TypeCoercion extends TypeCoercionBase {
     override val transform: PartialFunction[Expression, Expression] = {
       // Skip nodes who's children have not been resolved yet.
       case e if !e.childrenResolved => e
-      case d @ DateAdd(TimestampType(), _) => d.copy(startDate = Cast(d.startDate, DateType))
+      case d @ DateAdd(AnyTimestampType(), _) => d.copy(startDate = Cast(d.startDate, DateType))
       case d @ DateAdd(StringType(), _) => d.copy(startDate = Cast(d.startDate, DateType))
-      case d @ DateSub(TimestampType(), _) => d.copy(startDate = Cast(d.startDate, DateType))
+      case d @ DateSub(AnyTimestampType(), _) => d.copy(startDate = Cast(d.startDate, DateType))
       case d @ DateSub(StringType(), _) => d.copy(startDate = Cast(d.startDate, DateType))
 
       case s @ SubtractTimestamps(DateType(), AnyTimestampType(), _, _) =>

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
@@ -162,12 +162,13 @@ object RuleIdCollection {
       // In the production code path, the following rules are run in CombinedTypeCoercionRule, and
       // hence we only need to add them for unit testing.
       "org.apache.spark.sql.catalyst.analysis.AnsiTypeCoercion$PromoteStringLiterals" ::
+      "org.apache.spark.sql.catalyst.analysis.AnsiTypeCoercion$DateTimeOperations" ::
       "org.apache.spark.sql.catalyst.analysis.AnsiTypeCoercion$GetDateFieldOperations" ::
       "org.apache.spark.sql.catalyst.analysis.DecimalPrecision" ::
       "org.apache.spark.sql.catalyst.analysis.TypeCoercion$BooleanEquality" ::
+      "org.apache.spark.sql.catalyst.analysis.TypeCoercion$DateTimeOperations" ::
       "org.apache.spark.sql.catalyst.analysis.TypeCoercionBase$CaseWhenCoercion" ::
       "org.apache.spark.sql.catalyst.analysis.TypeCoercionBase$ConcatCoercion" ::
-      "org.apache.spark.sql.catalyst.analysis.TypeCoercionBase$DateTimeOperations" ::
       "org.apache.spark.sql.catalyst.analysis.TypeCoercionBase$Division" ::
       "org.apache.spark.sql.catalyst.analysis.TypeCoercionBase$EltCoercion" ::
       "org.apache.spark.sql.catalyst.analysis.TypeCoercionBase$FunctionArgumentConversion" ::