Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,9 @@ class ClientE2ETestSuite extends RemoteSparkSession with SQLHelper with PrivateM

for (enrichErrorEnabled <- Seq(false, true)) {
test(s"cause exception - ${enrichErrorEnabled}") {
withSQLConf("spark.sql.connect.enrichError.enabled" -> enrichErrorEnabled.toString) {
withSQLConf(
"spark.sql.connect.enrichError.enabled" -> enrichErrorEnabled.toString,
"spark.sql.legacy.timeParserPolicy" -> "EXCEPTION") {
val ex = intercept[SparkUpgradeException] {
spark
.sql("""
Expand Down
2 changes: 2 additions & 0 deletions docs/sql-migration-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ license: |
- Since Spark 4.0, MySQL JDBC datasource will read FLOAT as FloatType, while in Spark 3.5 and previous, it was read as DoubleType. To restore the previous behavior, you can cast the column to the old type.
- Since Spark 4.0, MySQL JDBC datasource will read BIT(n > 1) as BinaryType, while in Spark 3.5 and previous, read as LongType. To restore the previous behavior, set `spark.sql.legacy.mysql.bitArrayMapping.enabled` to `true`.
- Since Spark 4.0, MySQL JDBC datasource will write ShortType as SMALLINT, while in Spark 3.5 and previous, write as INTEGER. To restore the previous behavior, you can replace the column with IntegerType whenever before writing.
- Since Spark 4.0, The default value for `spark.sql.legacy.ctePrecedencePolicy` has been changed from `EXCEPTION` to `CORRECTED`. Instead of raising an error, inner CTE definitions take precedence over outer definitions.
- Since Spark 4.0, The default value for `spark.sql.legacy.timeParserPolicy` has been changed from `EXCEPTION` to `CORRECTED`. Instead of raising an `INCONSISTENT_BEHAVIOR_CROSS_VERSION` error, `CANNOT_PARSE_TIMESTAMP` will be raised if ANSI mode is enable. `NULL` will be returned if ANSI mode is disabled. See [Datetime Patterns for Formatting and Parsing](sql-ref-datetime-pattern.html).

## Upgrading from Spark SQL 3.5.1 to 3.5.2

Expand Down
1 change: 1 addition & 0 deletions python/pyspark/sql/tests/connect/test_connect_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ def test_error_enrichment_jvm_stacktrace(self):
{
"spark.sql.connect.enrichError.enabled": True,
"spark.sql.pyspark.jvmStacktrace.enabled": False,
"spark.sql.legacy.timeParserPolicy": "EXCEPTION",
}
):
with self.sql_conf({"spark.sql.connect.serverStacktrace.enabled": False}):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,6 @@ private[sql] object DefaultSqlApiConf extends SqlApiConf {
override def charVarcharAsString: Boolean = false
override def datetimeJava8ApiEnabled: Boolean = false
override def sessionLocalTimeZone: String = TimeZone.getDefault.getID
override def legacyTimeParserPolicy: LegacyBehaviorPolicy.Value = LegacyBehaviorPolicy.EXCEPTION
override def legacyTimeParserPolicy: LegacyBehaviorPolicy.Value = LegacyBehaviorPolicy.CORRECTED
override def defaultStringType: StringType = StringType
}
Original file line number Diff line number Diff line change
Expand Up @@ -4016,13 +4016,13 @@ object SQLConf {
.doc("When LEGACY, java.text.SimpleDateFormat is used for formatting and parsing " +
"dates/timestamps in a locale-sensitive manner, which is the approach before Spark 3.0. " +
"When set to CORRECTED, classes from java.time.* packages are used for the same purpose. " +
"The default value is EXCEPTION, RuntimeException is thrown when we will get different " +
"results.")
"When set to EXCEPTION, RuntimeException is thrown when we will get different " +
"results. The default is CORRECTED.")
.version("3.0.0")
.stringConf
.transform(_.toUpperCase(Locale.ROOT))
.checkValues(LegacyBehaviorPolicy.values.map(_.toString))
.createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString)
.createWithDefault(LegacyBehaviorPolicy.CORRECTED.toString)

val LEGACY_ARRAY_EXISTS_FOLLOWS_THREE_VALUED_LOGIC =
buildConf("spark.sql.legacy.followThreeValuedLogicInArrayExists")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ class DateFormatterSuite extends DatetimeFormatterSuite {
val formatter = DateFormatter("MM-dd")
// The date parser in 2.4 accepts 1970-02-29 and turn it into 1970-03-01, so we should get a
// SparkUpgradeException here.
intercept[SparkUpgradeException](formatter.parse("02-29"))
intercept[DateTimeException](formatter.parse("02-29"))
}

test("SPARK-36418: default parsing w/o pattern") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import org.scalatest.matchers.must.Matchers
import org.apache.spark.{SparkFunSuite, SparkIllegalArgumentException, SparkUpgradeException}
import org.apache.spark.sql.catalyst.plans.SQLHelper
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{date, UTC}
import org.apache.spark.sql.internal.SQLConf

trait DatetimeFormatterSuite extends SparkFunSuite with SQLHelper with Matchers {
import DateTimeFormatterHelper._
Expand Down Expand Up @@ -99,34 +100,36 @@ trait DatetimeFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
}

test("SPARK-31939: Fix Parsing day of year when year field pattern is missing") {
// resolved to queryable LocaleDate or fail directly
assertEqual("yyyy-dd-DD", "2020-29-60", date(2020, 2, 29))
assertError("yyyy-dd-DD", "2020-02-60",
"Field DayOfMonth 29 differs from DayOfMonth 2 derived from 2020-02-29")
assertEqual("yyyy-MM-DD", "2020-02-60", date(2020, 2, 29))
assertError("yyyy-MM-DD", "2020-03-60",
"Field MonthOfYear 2 differs from MonthOfYear 3 derived from 2020-02-29")
assertEqual("yyyy-MM-dd-DD", "2020-02-29-60", date(2020, 2, 29))
assertError("yyyy-MM-dd-DD", "2020-03-01-60",
"Field DayOfYear 61 differs from DayOfYear 60 derived from 2020-03-01")
assertEqual("yyyy-DDD", "2020-366", date(2020, 12, 31))
assertError("yyyy-DDD", "2019-366",
"Invalid date 'DayOfYear 366' as '2019' is not a leap year")
withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> "EXCEPTION") {
// resolved to queryable LocaleDate or fail directly
assertEqual("yyyy-dd-DD", "2020-29-60", date(2020, 2, 29))
assertError("yyyy-dd-DD", "2020-02-60",
"Field DayOfMonth 29 differs from DayOfMonth 2 derived from 2020-02-29")
assertEqual("yyyy-MM-DD", "2020-02-60", date(2020, 2, 29))
assertError("yyyy-MM-DD", "2020-03-60",
"Field MonthOfYear 2 differs from MonthOfYear 3 derived from 2020-02-29")
assertEqual("yyyy-MM-dd-DD", "2020-02-29-60", date(2020, 2, 29))
assertError("yyyy-MM-dd-DD", "2020-03-01-60",
"Field DayOfYear 61 differs from DayOfYear 60 derived from 2020-03-01")
assertEqual("yyyy-DDD", "2020-366", date(2020, 12, 31))
assertError("yyyy-DDD", "2019-366",
"Invalid date 'DayOfYear 366' as '2019' is not a leap year")

// unresolved and need to check manually(SPARK-31939 fixed)
assertEqual("DDD", "365", date(1970, 12, 31))
assertError("DDD", "366",
"Invalid date 'DayOfYear 366' as '1970' is not a leap year")
assertEqual("MM-DD", "03-60", date(1970, 3))
assertError("MM-DD", "02-60",
"Field MonthOfYear 2 differs from MonthOfYear 3 derived from 1970-03-01")
assertEqual("MM-dd-DD", "02-28-59", date(1970, 2, 28))
assertError("MM-dd-DD", "02-28-60",
"Field MonthOfYear 2 differs from MonthOfYear 3 derived from 1970-03-01")
assertError("MM-dd-DD", "02-28-58",
"Field DayOfMonth 28 differs from DayOfMonth 27 derived from 1970-02-27")
assertEqual("dd-DD", "28-59", date(1970, 2, 28))
assertError("dd-DD", "27-59",
"Field DayOfMonth 27 differs from DayOfMonth 28 derived from 1970-02-28")
// unresolved and need to check manually(SPARK-31939 fixed)
assertEqual("DDD", "365", date(1970, 12, 31))
assertError("DDD", "366",
"Invalid date 'DayOfYear 366' as '1970' is not a leap year")
assertEqual("MM-DD", "03-60", date(1970, 3))
assertError("MM-DD", "02-60",
"Field MonthOfYear 2 differs from MonthOfYear 3 derived from 1970-03-01")
assertEqual("MM-dd-DD", "02-28-59", date(1970, 2, 28))
assertError("MM-dd-DD", "02-28-60",
"Field MonthOfYear 2 differs from MonthOfYear 3 derived from 1970-03-01")
assertError("MM-dd-DD", "02-28-58",
"Field DayOfMonth 28 differs from DayOfMonth 27 derived from 1970-02-27")
assertEqual("dd-DD", "28-59", date(1970, 2, 28))
assertError("dd-DD", "27-59",
"Field DayOfMonth 27 differs from DayOfMonth 28 derived from 1970-02-28")
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,23 +36,25 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite {
override protected def useDateFormatter: Boolean = false

test("parsing timestamps using time zones") {
val localDate = "2018-12-02T10:11:12.001234"
val expectedMicros = Map(
"UTC" -> 1543745472001234L,
PST.getId -> 1543774272001234L,
CET.getId -> 1543741872001234L,
"Africa/Dakar" -> 1543745472001234L,
"America/Los_Angeles" -> 1543774272001234L,
"Asia/Urumqi" -> 1543723872001234L,
"Asia/Hong_Kong" -> 1543716672001234L,
"Europe/Brussels" -> 1543741872001234L)
outstandingTimezonesIds.foreach { zoneId =>
val formatter = TimestampFormatter(
"yyyy-MM-dd'T'HH:mm:ss.SSSSSS",
getZoneId(zoneId),
isParsing = true)
val microsSinceEpoch = formatter.parse(localDate)
assert(microsSinceEpoch === expectedMicros(zoneId))
withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> "EXCEPTION") {
val localDate = "2018-12-02T10:11:12.001234"
val expectedMicros = Map(
"UTC" -> 1543745472001234L,
PST.getId -> 1543774272001234L,
CET.getId -> 1543741872001234L,
"Africa/Dakar" -> 1543745472001234L,
"America/Los_Angeles" -> 1543774272001234L,
"Asia/Urumqi" -> 1543723872001234L,
"Asia/Hong_Kong" -> 1543716672001234L,
"Europe/Brussels" -> 1543741872001234L)
outstandingTimezonesIds.foreach { zoneId =>
val formatter = TimestampFormatter(
"yyyy-MM-dd'T'HH:mm:ss.SSSSSS",
getZoneId(zoneId),
isParsing = true)
val microsSinceEpoch = formatter.parse(localDate)
assert(microsSinceEpoch === expectedMicros(zoneId))
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ select to_timestamp('1', 'yy')
-- !query schema
struct<>
-- !query output
org.apache.spark.SparkUpgradeException
org.apache.spark.SparkDateTimeException
{
"errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
"sqlState" : "42K0B",
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"config" : "\"spark.sql.legacy.timeParserPolicy\"",
"datetime" : "'1'"
"ansiConfig" : "\"spark.sql.ansi.enabled\"",
"message" : "Text '1' could not be parsed at index 0"
}
}

Expand All @@ -45,13 +45,13 @@ select to_timestamp('123', 'yy')
-- !query schema
struct<>
-- !query output
org.apache.spark.SparkUpgradeException
org.apache.spark.SparkDateTimeException
{
"errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
"sqlState" : "42K0B",
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"config" : "\"spark.sql.legacy.timeParserPolicy\"",
"datetime" : "'123'"
"ansiConfig" : "\"spark.sql.ansi.enabled\"",
"message" : "Text '123' could not be parsed, unparsed text found at index 2"
}
}

Expand All @@ -61,13 +61,13 @@ select to_timestamp('1', 'yyy')
-- !query schema
struct<>
-- !query output
org.apache.spark.SparkUpgradeException
org.apache.spark.SparkDateTimeException
{
"errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
"sqlState" : "42K0B",
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"config" : "\"spark.sql.legacy.timeParserPolicy\"",
"datetime" : "'1'"
"ansiConfig" : "\"spark.sql.ansi.enabled\"",
"message" : "Text '1' could not be parsed at index 0"
}
}

Expand Down Expand Up @@ -110,13 +110,13 @@ select to_timestamp('9', 'DD')
-- !query schema
struct<>
-- !query output
org.apache.spark.SparkUpgradeException
org.apache.spark.SparkDateTimeException
{
"errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
"sqlState" : "42K0B",
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"config" : "\"spark.sql.legacy.timeParserPolicy\"",
"datetime" : "'9'"
"ansiConfig" : "\"spark.sql.ansi.enabled\"",
"message" : "Text '9' could not be parsed at index 0"
}
}

Expand All @@ -142,13 +142,13 @@ select to_timestamp('9', 'DDD')
-- !query schema
struct<>
-- !query output
org.apache.spark.SparkUpgradeException
org.apache.spark.SparkDateTimeException
{
"errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
"sqlState" : "42K0B",
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"config" : "\"spark.sql.legacy.timeParserPolicy\"",
"datetime" : "'9'"
"ansiConfig" : "\"spark.sql.ansi.enabled\"",
"message" : "Text '9' could not be parsed at index 0"
}
}

Expand All @@ -158,13 +158,13 @@ select to_timestamp('99', 'DDD')
-- !query schema
struct<>
-- !query output
org.apache.spark.SparkUpgradeException
org.apache.spark.SparkDateTimeException
{
"errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
"sqlState" : "42K0B",
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"config" : "\"spark.sql.legacy.timeParserPolicy\"",
"datetime" : "'99'"
"ansiConfig" : "\"spark.sql.ansi.enabled\"",
"message" : "Text '99' could not be parsed at index 0"
}
}

Expand Down Expand Up @@ -284,17 +284,9 @@ org.apache.spark.SparkDateTimeException
-- !query
select from_csv('2018-366', 'date Date', map('dateFormat', 'yyyy-DDD'))
-- !query schema
struct<>
struct<from_csv(2018-366):struct<date:date>>
-- !query output
org.apache.spark.SparkUpgradeException
{
"errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
"sqlState" : "42K0B",
"messageParameters" : {
"config" : "\"spark.sql.legacy.timeParserPolicy\"",
"datetime" : "'2018-366'"
}
}
{"date":null}


-- !query
Expand Down
Loading