Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import org.apache.spark.sql.types._
* * Short: Short, Int, Long, Decimal, Float, Double
* * Int: Int, Long, Decimal, Float, Double
* * Long: Long, Decimal, Float, Double
* * Decimal: Any wider Numeric type
* * Decimal: Float, Double, or any wider Numeric type
* * Float: Float, Double
* * Double: Double
* * String: String
Expand All @@ -43,7 +43,7 @@ import org.apache.spark.sql.types._
* * Boolean: Boolean
* * Interval: Interval
* As for complex data types, Spark will determine the precedent list recursively based on their
* sub-types.
* sub-types and nullability.
*
* With the definition of type precedent list, the general type coercion rules are as following:
* * Data type S is allowed to be implicitly cast as type T iff T is in the precedence list of S
Expand All @@ -67,8 +67,7 @@ import org.apache.spark.sql.types._
* * MapConcat
* * CreateMap
* * For complex types (struct, array, map), Spark recursively looks into the element type and
* applies the rules above. If the element nullability is converted from true to false, add
* runtime null check to the elements.
* applies the rules above.
* Note: this new type coercion system will allow implicit converting String type literals as other
* primitive types, in case of breaking too many existing Spark SQL queries. This is a special
* rule and it is not from the ANSI SQL standard.
Expand Down Expand Up @@ -163,12 +162,14 @@ object AnsiTypeCoercion extends TypeCoercionBase {

// This type coercion system will allow implicit converting String type literals as other
// primitive types, in case of breaking too many existing Spark SQL queries.
case (StringType, a: AtomicType) if isInputFoldable && a != BooleanType && a != StringType =>
case (StringType, a: AtomicType) if isInputFoldable =>
Some(a)

// If the target type is any Numeric type, convert the String type literal as Double type.
case (StringType, NumericType) if isInputFoldable =>
Some(DoubleType)

// If the target type is any Decimal type, convert the String type literal as Double type.
case (StringType, DecimalType) if isInputFoldable =>
Some(DecimalType.SYSTEM_DEFAULT)

Expand Down Expand Up @@ -242,25 +243,11 @@ object AnsiTypeCoercion extends TypeCoercionBase {
// Skip nodes who's children have not been resolved yet.
case e if !e.childrenResolved => e

case a @ BinaryArithmetic(left @ StringType(), right)
if right.dataType != CalendarIntervalType && left.foldable =>
a.makeCopy(Array(Cast(left, DoubleType), right))
case a @ BinaryArithmetic(left, right @ StringType())
if left.dataType != CalendarIntervalType && right.foldable =>
a.makeCopy(Array(left, Cast(right, DoubleType)))

// For equality between string and timestamp we cast the string to a timestamp
// so that things like rounding of subsecond precision does not affect the comparison.
case p @ Equality(left @ StringType(), right @ TimestampType()) if left.foldable =>
p.makeCopy(Array(Cast(left, TimestampType), right))
case p @ Equality(left @ TimestampType(), right @ StringType()) if right.foldable =>
p.makeCopy(Array(left, Cast(right, TimestampType)))

case p @ BinaryComparison(left @ StringType(), right @ AtomicType()) if left.foldable =>
p.makeCopy(Array(castExpr(left, right.dataType), right))

case p @ BinaryComparison(left @ AtomicType(), right @ StringType()) if right.foldable =>
p.makeCopy(Array(left, castExpr(right, left.dataType)))
case b @ BinaryOperator(left @ StringType(), right @ AtomicType()) if left.foldable =>
b.makeCopy(Array(castExpr(left, right.dataType), right))

case b @ BinaryOperator(left @ AtomicType(), right @ StringType()) if right.foldable =>
b.makeCopy(Array(left, castExpr(right, left.dataType)))

case Abs(e @ StringType()) if e.foldable => Abs(Cast(e, DoubleType))
case m @ UnaryMinus(e @ StringType(), _) if e.foldable =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ abstract class TypeCoercionBase {
* system limitation, this rule will truncate the decimal type. If a decimal and other fractional
* types are compared, returns a double type.
*/
def findWiderTypeForDecimal(dt1: DataType, dt2: DataType): Option[DataType] = {
protected def findWiderTypeForDecimal(dt1: DataType, dt2: DataType): Option[DataType] = {
(dt1, dt2) match {
case (t1: DecimalType, t2: DecimalType) =>
Some(DecimalPrecision.widerDecimalType(t1, t2))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,12 +234,10 @@ class AnsiTypeCoercionSuite extends AnalysisTest {
}

test("implicit type cast - foldable StringType") {
val castableTypes =
numericTypes ++ datetimeTypes ++ Seq(BinaryType, StringType)
castableTypes.foreach { dt =>
atomicTypes.foreach { dt =>
shouldCastStringLiteral(dt, dt)
}
allTypes.filterNot(castableTypes.contains).foreach { dt =>
allTypes.filterNot(atomicTypes.contains).foreach { dt =>
shouldNotCastStringLiteral(dt)
}
shouldCastStringLiteral(DecimalType, DecimalType.defaultConcreteType)
Expand Down Expand Up @@ -1390,9 +1388,9 @@ class AnsiTypeCoercionSuite extends AnalysisTest {
EqualTo(castStringLiteralAsTimestamp, timestampLiteral))

ruleTest(rule, Add(stringLiteral, Literal(1)),
Add(castStringLiteralAsDouble, Literal(1)))
Add(castStringLiteralAsInt, Literal(1)))
ruleTest(rule, Divide(stringLiteral, Literal(1)),
Divide(castStringLiteralAsDouble, Literal(1)))
Divide(castStringLiteralAsInt, Literal(1)))

ruleTest(rule,
In(Literal(1), Seq(stringLiteral, Literal(2))),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -931,7 +931,7 @@ SELECT * FROM outermost ORDER BY 1;
-- data-modifying WITH containing INSERT...ON CONFLICT DO UPDATE
-- [ORIGINAL SQL]
--CREATE TABLE withz AS SELECT i AS k, (i || ' v')::text v FROM generate_series(1, 16, 3) i;
CREATE TABLE withz USING parquet AS SELECT i AS k, CAST(i AS string) || ' v' AS v FROM (SELECT EXPLODE(SEQUENCE(1, 16, 3)) i);
CREATE TABLE withz USING parquet AS SELECT i AS k, CAST(i || ' v' AS string) v FROM (SELECT EXPLODE(SEQUENCE(1, 16, 3)) i);
-- [NOTE] Spark SQL doesn't support UNIQUE constraints
--ALTER TABLE withz ADD UNIQUE (k);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,32 +223,30 @@ Table or view not found: outermost; line 4 pos 23


-- !query
CREATE TABLE withz USING parquet AS SELECT i AS k, CAST(i AS string) || ' v' AS v FROM (SELECT EXPLODE(SEQUENCE(1, 16, 3)) i)
CREATE TABLE withz USING parquet AS SELECT i AS k, CAST(i || ' v' AS string) v FROM (SELECT EXPLODE(SEQUENCE(1, 16, 3)) i)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see what's going on now. This test is for the SQL WITH feature, not string concat, and we lose test coverage because we failed to create the table and following queries all fail.

Let's change it to CAST(i AS string) || ' v'.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, that was my purpose.

-- !query schema
struct<>
-- !query output

org.apache.spark.sql.AnalysisException
cannot resolve 'concat(__auto_generated_subquery_name.`i`, ' v')' due to data type mismatch: input to function concat should have been string, binary or array, but it's [int, string]; line 1 pos 56


-- !query
SELECT * FROM withz ORDER BY k
-- !query schema
struct<k:int,v:string>
struct<>
-- !query output
1 1 v
4 4 v
7 7 v
10 10 v
13 13 v
16 16 v
org.apache.spark.sql.AnalysisException
Table or view not found: withz; line 1 pos 14


-- !query
DROP TABLE withz
-- !query schema
struct<>
-- !query output

org.apache.spark.sql.AnalysisException
Table or view not found: withz; line 1 pos 11


-- !query
Expand Down