From 46665cd448cf55c3e84c01aba3b270662ab9ca9a Mon Sep 17 00:00:00 2001 From: lipzhu Date: Wed, 27 May 2020 19:31:18 +0800 Subject: [PATCH 1/3] Improve error message for incompatible data types --- docs/sql-ref-ansi-compliance.md | 2 +- .../org/apache/spark/sql/types/DataType.scala | 7 ++++--- .../analysis/DataSourceV2AnalysisSuite.scala | 10 +++++----- .../DataTypeWriteCompatibilitySuite.scala | 18 +++++++++--------- .../inputs/postgreSQL/window_part1.sql | 2 +- .../inputs/postgreSQL/window_part3.sql | 2 +- .../apache/spark/sql/sources/InsertSuite.scala | 16 ++++++++-------- .../sql/test/DataFrameReaderWriterSuite.scala | 10 +++++----- .../spark/sql/hive/client/VersionsSuite.scala | 2 +- 9 files changed, 35 insertions(+), 34 deletions(-) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index b62834ebe906..eab194c71ec7 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -95,7 +95,7 @@ CREATE TABLE t (v INT); -- `spark.sql.storeAssignmentPolicy=ANSI` INSERT INTO t VALUES ('1'); org.apache.spark.sql.AnalysisException: Cannot write incompatible data to table '`default`.`t`': -- Cannot safely cast 'v': StringType to IntegerType; +- Cannot safely cast 'v': string to int; -- `spark.sql.storeAssignmentPolicy=LEGACY` (This is a legacy behaviour until Spark 2.x) INSERT INTO t VALUES ('1'); diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala index 7449a28e069d..183f6547d029 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala @@ -457,7 +457,7 @@ object DataType { case (w: AtomicType, r: AtomicType) if storeAssignmentPolicy == STRICT => if (!Cast.canUpCast(w, r)) { - addError(s"Cannot safely cast '$context': $w to $r") + addError(s"Cannot safely cast '$context': ${w.simpleString} to ${r.simpleString}") false } else { true @@ -467,7 +467,7 @@ object DataType { case (w: AtomicType, r: AtomicType) if storeAssignmentPolicy == ANSI => if (!Cast.canANSIStoreAssign(w, r)) { - addError(s"Cannot safely cast '$context': $w to $r") + addError(s"Cannot safely cast '$context': ${w.simpleString} to ${r.simpleString}") false } else { true @@ -477,7 +477,8 @@ object DataType { true case (w, r) => - addError(s"Cannot write '$context': $w is incompatible with $r") + addError(s"Cannot write '$context': " + + s"${w.simpleString} is incompatible with ${r.simpleString}") false } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala index c01dea96fe2d..e466d558db1e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala @@ -21,7 +21,7 @@ import java.net.URI import java.util.Locale import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog, SessionCatalog} -import org.apache.spark.sql.catalyst.expressions.{Alias, AnsiCast, AttributeReference, Cast, Expression, LessThanOrEqual, Literal} +import org.apache.spark.sql.catalyst.expressions.{Alias, AnsiCast, AttributeReference, Cast, LessThanOrEqual, Literal} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy @@ -143,7 +143,7 @@ abstract class DataSourceV2StrictAnalysisSuite extends DataSourceV2AnalysisBaseS assertNotResolved(parsedPlan) assertAnalysisError(parsedPlan, Seq( "Cannot write", "'table-name'", - "Cannot safely cast", "'x'", "'y'", "DoubleType to FloatType")) + "Cannot safely cast", "'x'", "'y'", "double to float")) } test("byName: multiple field errors are reported") { @@ -160,7 +160,7 @@ abstract class DataSourceV2StrictAnalysisSuite extends DataSourceV2AnalysisBaseS assertNotResolved(parsedPlan) assertAnalysisError(parsedPlan, Seq( "Cannot write incompatible data to table", "'table-name'", - "Cannot safely cast", "'x'", "DoubleType to FloatType", + "Cannot safely cast", "'x'", "double to float", "Cannot write nullable values to non-null column", "'x'", "Cannot find data for output column", "'y'")) } @@ -176,7 +176,7 @@ abstract class DataSourceV2StrictAnalysisSuite extends DataSourceV2AnalysisBaseS assertNotResolved(parsedPlan) assertAnalysisError(parsedPlan, Seq( "Cannot write", "'table-name'", - "Cannot safely cast", "'x'", "'y'", "DoubleType to FloatType")) + "Cannot safely cast", "'x'", "'y'", "double to float")) } test("byPosition: multiple field errors are reported") { @@ -194,7 +194,7 @@ abstract class DataSourceV2StrictAnalysisSuite extends DataSourceV2AnalysisBaseS assertAnalysisError(parsedPlan, Seq( "Cannot write incompatible data to table", "'table-name'", "Cannot write nullable values to non-null column", "'x'", - "Cannot safely cast", "'x'", "DoubleType to FloatType")) + "Cannot safely cast", "'x'", "double to float")) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala index c47332f5d9fc..63c9960ad802 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala @@ -80,7 +80,7 @@ class StrictDataTypeWriteCompatibilitySuite extends DataTypeWriteCompatibilityBa test("Check NullType is incompatible with all other types") { allNonNullTypes.foreach { t => assertSingleError(NullType, t, "nulls", s"Should not allow writing None to type $t") { err => - assert(err.contains(s"incompatible with $t")) + assert(err.contains(s"incompatible with ${t.simpleString}")) } } } @@ -145,12 +145,12 @@ class ANSIDataTypeWriteCompatibilitySuite extends DataTypeWriteCompatibilityBase test("Conversions between timestamp and long are not allowed") { assertSingleError(LongType, TimestampType, "longToTimestamp", "Should not allow long to timestamp") { err => - assert(err.contains("Cannot safely cast 'longToTimestamp': LongType to TimestampType")) + assert(err.contains("Cannot safely cast 'longToTimestamp': bigint to timestamp")) } assertSingleError(TimestampType, LongType, "timestampToLong", "Should not allow timestamp to long") { err => - assert(err.contains("Cannot safely cast 'timestampToLong': TimestampType to LongType")) + assert(err.contains("Cannot safely cast 'timestampToLong': timestamp to bigint")) } } @@ -209,8 +209,8 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite { s"Should not allow writing $w to $r because cast is not safe") { err => assert(err.contains("'t'"), "Should include the field name context") assert(err.contains("Cannot safely cast"), "Should identify unsafe cast") - assert(err.contains(s"$w"), "Should include write type") - assert(err.contains(s"$r"), "Should include read type") + assert(err.contains(s"${w.simpleString}"), "Should include write type") + assert(err.contains(s"${r.simpleString}"), "Should include read type") } } } @@ -413,7 +413,7 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite { assertNumErrors(writeType, readType, "top", "Should catch 14 errors", 14) { errs => assert(errs(0).contains("'top.a.element'"), "Should identify bad type") assert(errs(0).contains("Cannot safely cast")) - assert(errs(0).contains("StringType to DoubleType")) + assert(errs(0).contains("string to double")) assert(errs(1).contains("'top.a'"), "Should identify bad type") assert(errs(1).contains("Cannot write nullable elements to array of non-nulls")) @@ -430,11 +430,11 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite { assert(errs(5).contains("'top.m.key'"), "Should identify bad type") assert(errs(5).contains("Cannot safely cast")) - assert(errs(5).contains("StringType to LongType")) + assert(errs(5).contains("string to bigint")) assert(errs(6).contains("'top.m.value'"), "Should identify bad type") assert(errs(6).contains("Cannot safely cast")) - assert(errs(6).contains("BooleanType to FloatType")) + assert(errs(6).contains("boolean to float")) assert(errs(7).contains("'top.m'"), "Should identify bad type") assert(errs(7).contains("Cannot write nullable values to map of non-nulls")) @@ -452,7 +452,7 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite { assert(errs(11).contains("'top.x'"), "Should identify bad type") assert(errs(11).contains("Cannot safely cast")) - assert(errs(11).contains("StringType to IntegerType")) + assert(errs(11).contains("string to int")) assert(errs(12).contains("'top'"), "Should identify bad type") assert(errs(12).contains("expected 'x', found 'y'"), "Should detect name mismatch") diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part1.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part1.sql index 087d7a5befd1..6e95aca7aff6 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part1.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part1.sql @@ -146,7 +146,7 @@ SELECT count(*) OVER (PARTITION BY four) FROM (SELECT * FROM tenk1 WHERE FALSE)s -- mixture of agg/wfunc in the same window -- SELECT sum(salary) OVER w, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary DESC); --- Cannot safely cast 'enroll_date': StringType to DateType; +-- Cannot safely cast 'enroll_date': string to date; -- SELECT empno, depname, salary, bonus, depadj, MIN(bonus) OVER (ORDER BY empno), MAX(depadj) OVER () FROM( -- SELECT *, -- CASE WHEN enroll_date < '2008-01-01' THEN 2008 - extract(year FROM enroll_date) END * 500 AS bonus, diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part3.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part3.sql index cd3b74b3aa03..f4b8454da0d8 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part3.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part3.sql @@ -42,7 +42,7 @@ create table datetimes ( f_timestamp timestamp ) using parquet; --- Spark cannot safely cast StringType to TimestampType +-- Spark cannot safely cast string to timestamp -- [SPARK-29636] Spark can't parse '11:00 BST' or '2000-10-19 10:23:54+01' signatures to timestamp insert into datetimes values (1, timestamp '11:00', cast ('11:00 BST' as timestamp), cast ('1 year' as timestamp), cast ('2000-10-19 10:23:54+01' as timestamp), timestamp '2000-10-19 10:23:54'), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala index 87a4d061b817..abd33ab8a8f2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala @@ -623,12 +623,12 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { var msg = intercept[AnalysisException] { sql("insert into t select 1L, 2") }.getMessage - assert(msg.contains("Cannot safely cast 'i': LongType to IntegerType")) + assert(msg.contains("Cannot safely cast 'i': bigint to int")) msg = intercept[AnalysisException] { sql("insert into t select 1, 2.0") }.getMessage - assert(msg.contains("Cannot safely cast 'd': DecimalType(2,1) to DoubleType")) + assert(msg.contains("Cannot safely cast 'd': decimal(2,1) to double")) msg = intercept[AnalysisException] { sql("insert into t select 1, 2.0D, 3") @@ -660,18 +660,18 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { var msg = intercept[AnalysisException] { sql("insert into t values('a', 'b')") }.getMessage - assert(msg.contains("Cannot safely cast 'i': StringType to IntegerType") && - msg.contains("Cannot safely cast 'd': StringType to DoubleType")) + assert(msg.contains("Cannot safely cast 'i': string to int") && + msg.contains("Cannot safely cast 'd': string to double")) msg = intercept[AnalysisException] { sql("insert into t values(now(), now())") }.getMessage - assert(msg.contains("Cannot safely cast 'i': TimestampType to IntegerType") && - msg.contains("Cannot safely cast 'd': TimestampType to DoubleType")) + assert(msg.contains("Cannot safely cast 'i': timestamp to int") && + msg.contains("Cannot safely cast 'd': timestamp to double")) msg = intercept[AnalysisException] { sql("insert into t values(true, false)") }.getMessage - assert(msg.contains("Cannot safely cast 'i': BooleanType to IntegerType") && - msg.contains("Cannot safely cast 'd': BooleanType to DoubleType")) + assert(msg.contains("Cannot safely cast 'i': boolean to int") && + msg.contains("Cannot safely cast 'd': boolean to double")) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala index 9747840ce403..fe0a8439acc2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala @@ -333,7 +333,7 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with var msg = intercept[AnalysisException] { Seq((1L, 2.0)).toDF("i", "d").write.mode("append").saveAsTable("t") }.getMessage - assert(msg.contains("Cannot safely cast 'i': LongType to IntegerType")) + assert(msg.contains("Cannot safely cast 'i': bigint to int")) // Insert into table successfully. Seq((1, 2.0)).toDF("i", "d").write.mode("append").saveAsTable("t") @@ -354,14 +354,14 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with var msg = intercept[AnalysisException] { Seq(("a", "b")).toDF("i", "d").write.mode("append").saveAsTable("t") }.getMessage - assert(msg.contains("Cannot safely cast 'i': StringType to IntegerType") && - msg.contains("Cannot safely cast 'd': StringType to DoubleType")) + assert(msg.contains("Cannot safely cast 'i': string to int") && + msg.contains("Cannot safely cast 'd': string to double")) msg = intercept[AnalysisException] { Seq((true, false)).toDF("i", "d").write.mode("append").saveAsTable("t") }.getMessage - assert(msg.contains("Cannot safely cast 'i': BooleanType to IntegerType") && - msg.contains("Cannot safely cast 'd': BooleanType to DoubleType")) + assert(msg.contains("Cannot safely cast 'i': boolean to int") && + msg.contains("Cannot safely cast 'd': boolean to double")) } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala index d1dd13623650..8642a5ff1681 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala @@ -982,7 +982,7 @@ class VersionsSuite extends SparkFunSuite with Logging { """.stripMargin ) - val errorMsg = "Cannot safely cast 'f0': DecimalType(2,1) to BinaryType" + val errorMsg = "Cannot safely cast 'f0': decimal(2,1) to binary" if (isPartitioned) { val insertStmt = s"INSERT OVERWRITE TABLE $tableName partition (ds='a') SELECT 1.3" From 77c7eef56ba7edaa4fee9bc8f6b5ac471d0806d7 Mon Sep 17 00:00:00 2001 From: lipzhu Date: Thu, 28 May 2020 22:26:01 +0800 Subject: [PATCH 2/3] use catalogString --- .../main/scala/org/apache/spark/sql/types/DataType.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala index 183f6547d029..fe8d7efc9dc1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala @@ -457,7 +457,7 @@ object DataType { case (w: AtomicType, r: AtomicType) if storeAssignmentPolicy == STRICT => if (!Cast.canUpCast(w, r)) { - addError(s"Cannot safely cast '$context': ${w.simpleString} to ${r.simpleString}") + addError(s"Cannot safely cast '$context': ${w.catalogString} to ${r.catalogString}") false } else { true @@ -467,7 +467,7 @@ object DataType { case (w: AtomicType, r: AtomicType) if storeAssignmentPolicy == ANSI => if (!Cast.canANSIStoreAssign(w, r)) { - addError(s"Cannot safely cast '$context': ${w.simpleString} to ${r.simpleString}") + addError(s"Cannot safely cast '$context': ${w.catalogString} to ${r.catalogString}") false } else { true @@ -478,7 +478,7 @@ object DataType { case (w, r) => addError(s"Cannot write '$context': " + - s"${w.simpleString} is incompatible with ${r.simpleString}") + s"${w.catalogString} is incompatible with ${r.catalogString}") false } } From 9c4b485030dbb80d0d757ef8100984a53ff7eb2b Mon Sep 17 00:00:00 2001 From: lipzhu Date: Tue, 2 Jun 2020 11:14:18 +0800 Subject: [PATCH 3/3] use catalogString --- .../spark/sql/types/DataTypeWriteCompatibilitySuite.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala index 63c9960ad802..1a262d646ca1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala @@ -80,7 +80,7 @@ class StrictDataTypeWriteCompatibilitySuite extends DataTypeWriteCompatibilityBa test("Check NullType is incompatible with all other types") { allNonNullTypes.foreach { t => assertSingleError(NullType, t, "nulls", s"Should not allow writing None to type $t") { err => - assert(err.contains(s"incompatible with ${t.simpleString}")) + assert(err.contains(s"incompatible with ${t.catalogString}")) } } } @@ -209,8 +209,8 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite { s"Should not allow writing $w to $r because cast is not safe") { err => assert(err.contains("'t'"), "Should include the field name context") assert(err.contains("Cannot safely cast"), "Should identify unsafe cast") - assert(err.contains(s"${w.simpleString}"), "Should include write type") - assert(err.contains(s"${r.simpleString}"), "Should include read type") + assert(err.contains(s"${w.catalogString}"), "Should include write type") + assert(err.contains(s"${r.catalogString}"), "Should include read type") } } }