Skip to content
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/sql-migration-guide-upgrade.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ license: |

- Since Spark 3.0, substitution order of nested WITH clauses is changed and an inner CTE definition takes precedence over an outer. In version 2.4 and earlier, `WITH t AS (SELECT 1), t2 AS (WITH t AS (SELECT 2) SELECT * FROM t) SELECT * FROM t2` returns `1` while in version 3.0 it returns `2`. The previous behaviour can be restored by setting `spark.sql.legacy.ctePrecedence.enabled` to `true`.

- Since Spark 3.0, the `add_months` function does not adjust the resulting date to a last day of month if the original date is a last day of month. The resulting date is adjust to a last day of month only if it is invalid. For example, `select add_months(DATE'2019-02-28', 1)` produces `2019-03-28` but `select add_months(DATE'2019-01-31', 1)` produces `2019-02-28`.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is adjust -> is adjusted.
So previously, adding a month to 2019-02-28 resulted in 2019-03-31? It might be worth clarifying that too in your example.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So previously, adding a month to 2019-02-28 resulted in 2019-03-31?

Yes, it does:

scala> spark.sql("select add_months(DATE'2019-02-28', 1)").show
+--------------------------------+
|add_months(DATE '2019-02-28', 1)|
+--------------------------------+
|                      2019-03-31|
+--------------------------------+

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I updated the SQL migration guide, and add the example.


## Upgrading from Spark SQL 2.4 to 2.4.1

- The value of `spark.executor.heartbeatInterval`, when specified without units like "30" rather than "30s", was
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -505,60 +505,12 @@ object DateTimeUtils {
LocalDate.ofEpochDay(date).getDayOfMonth
}

/**
* The number of days for each month (not leap year)
*/
private val monthDays = Array(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)

/**
* Returns the date value for the first day of the given month.
* The month is expressed in months since year zero (17999 BC), starting from 0.
*/
private def firstDayOfMonth(absoluteMonth: Int): SQLDate = {
val absoluteYear = absoluteMonth / 12
var monthInYear = absoluteMonth - absoluteYear * 12
var date = getDateFromYear(absoluteYear)
if (monthInYear >= 2 && isLeap(absoluteYear + YearZero)) {
date += 1
}
while (monthInYear > 0) {
date += monthDays(monthInYear - 1)
monthInYear -= 1
}
date
}

/**
* Returns the date value for January 1 of the given year.
* The year is expressed in years since year zero (17999 BC), starting from 0.
*/
private def getDateFromYear(absoluteYear: Int): SQLDate = {
val absoluteDays = (absoluteYear * 365 + absoluteYear / 400 - absoluteYear / 100
+ absoluteYear / 4)
absoluteDays - toYearZero
}

/**
* Add date and year-month interval.
* Returns a date value, expressed in days since 1.1.1970.
*/
def dateAddMonths(days: SQLDate, months: Int): SQLDate = {
val (year, monthInYear, dayOfMonth, daysToMonthEnd) = splitDate(days)
val absoluteMonth = (year - YearZero) * 12 + monthInYear - 1 + months
val nonNegativeMonth = if (absoluteMonth >= 0) absoluteMonth else 0
val currentMonthInYear = nonNegativeMonth % 12
val currentYear = nonNegativeMonth / 12

val leapDay = if (currentMonthInYear == 1 && isLeap(currentYear + YearZero)) 1 else 0
val lastDayOfMonth = monthDays(currentMonthInYear) + leapDay

val currentDayInMonth = if (daysToMonthEnd == 0 || dayOfMonth >= lastDayOfMonth) {
// last day of the month
lastDayOfMonth
} else {
dayOfMonth
}
firstDayOfMonth(nonNegativeMonth) + currentDayInMonth - 1
LocalDate.ofEpochDay(days).plusMonths(months).toEpochDay.toInt
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(AddMonths(Literal.create(null, DateType), Literal.create(null, IntegerType)),
null)
checkEvaluation(
AddMonths(Literal(Date.valueOf("2015-01-30")), Literal(Int.MinValue)), -7293498)
AddMonths(Literal(Date.valueOf("2015-01-30")), Literal(Int.MinValue)), -938165455)
checkEvaluation(
AddMonths(Literal(Date.valueOf("2016-02-28")), positiveIntLit), 1014213)
checkEvaluation(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -359,18 +359,18 @@ class DateTimeUtilsSuite extends SparkFunSuite {

test("date add months") {
val input = days(1997, 2, 28, 10, 30)
assert(dateAddMonths(input, 36) === days(2000, 2, 29))
assert(dateAddMonths(input, -13) === days(1996, 1, 31))
assert(dateAddMonths(input, 36) === days(2000, 2, 28))
assert(dateAddMonths(input, -13) === days(1996, 1, 28))
}

test("timestamp add months") {
val ts1 = date(1997, 2, 28, 10, 30, 0)
val ts2 = date(2000, 2, 29, 10, 30, 0, 123000)
val ts2 = date(2000, 2, 28, 10, 30, 0, 123000)
assert(timestampAddInterval(ts1, 36, 123000, defaultTz) === ts2)

val ts3 = date(1997, 2, 27, 16, 0, 0, 0, TimeZonePST)
val ts4 = date(2000, 2, 27, 16, 0, 0, 123000, TimeZonePST)
val ts5 = date(2000, 2, 29, 0, 0, 0, 123000, TimeZoneGMT)
val ts5 = date(2000, 2, 28, 0, 0, 0, 123000, TimeZoneGMT)
assert(timestampAddInterval(ts3, 36, 123000, TimeZonePST) === ts4)
assert(timestampAddInterval(ts3, 36, 123000, TimeZoneGMT) === ts5)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -301,11 +301,11 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
val df = Seq((1, t1, d1), (3, t2, d2)).toDF("n", "t", "d")
checkAnswer(
df.selectExpr(s"d - $i"),
Seq(Row(Date.valueOf("2015-07-30")), Row(Date.valueOf("2015-12-30"))))
Seq(Row(Date.valueOf("2015-07-29")), Row(Date.valueOf("2015-12-28"))))
checkAnswer(
df.selectExpr(s"t - $i"),
Seq(Row(Timestamp.valueOf("2015-07-31 23:59:59")),
Row(Timestamp.valueOf("2015-12-31 00:00:00"))))
Row(Timestamp.valueOf("2015-12-29 00:00:00"))))
}

test("function add_months") {
Expand All @@ -314,10 +314,10 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
val df = Seq((1, d1), (2, d2)).toDF("n", "d")
checkAnswer(
df.select(add_months(col("d"), 1)),
Seq(Row(Date.valueOf("2015-09-30")), Row(Date.valueOf("2015-03-31"))))
Seq(Row(Date.valueOf("2015-09-30")), Row(Date.valueOf("2015-03-28"))))
checkAnswer(
df.selectExpr("add_months(d, -1)"),
Seq(Row(Date.valueOf("2015-07-31")), Row(Date.valueOf("2015-01-31"))))
Seq(Row(Date.valueOf("2015-07-31")), Row(Date.valueOf("2015-01-28"))))
}

test("function months_between") {
Expand Down