Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
[SPARK-29364][SQL] Return an interval from date subtract according to…
… SQL standard

Proposed new expression `SubtractDates` which is used in `date1` - `date2`. It has the `INTERVAL` type, and returns the interval from `date1` (inclusive) and `date2` (exclusive). For example:
```sql
> select date'tomorrow' - date'yesterday';
interval 2 days
```

Closes #26034

- To conform the SQL standard which states the result type of `date operand 1` - `date operand 2` must be the interval type. See [4.5.3  Operations involving datetimes and intervals](http://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt).
- Improve Spark SQL UX and allow mixing date and timestamp in subtractions. For example: `select timestamp'now' + (date'2019-10-01' - date'2019-09-15')`

Before the query below returns number of days:
```sql
spark-sql> select date'2019-10-05' - date'2018-09-01';
399
```
After it returns an interval:
```sql
spark-sql> select date'2019-10-05' - date'2018-09-01';
interval 1 years 1 months 4 days
```

- by new tests in `DateExpressionsSuite` and `TypeCoercionSuite`.
- by existing tests in `date.sql`

Closes #26112 from MaxGekk/date-subtract.

Authored-by: Maxim Gekk <[email protected]>
Signed-off-by: Yuming Wang <[email protected]>
  • Loading branch information
MaxGekk authored and xuanyuanking committed Dec 9, 2019
commit cf2bc65ec47cfc7db0d8077c41816b2825282461
Original file line number Diff line number Diff line change
Expand Up @@ -2145,3 +2145,25 @@ case class SubtractTimestamps(endTimestamp: Expression, startTimestamp: Expressi
s"new org.apache.spark.unsafe.types.CalendarInterval(0, 0, $end - $start)")
}
}

/**
* Returns the interval from the `left` date (inclusive) to the `right` date (exclusive).
*/
case class SubtractDates(left: Expression, right: Expression)
extends BinaryExpression with ImplicitCastInputTypes {

override def inputTypes: Seq[AbstractDataType] = Seq(DateType, DateType)
override def dataType: DataType = CalendarIntervalType

override def nullSafeEval(leftDays: Any, rightDays: Any): Any = {
DateTimeUtils.subtractDates(leftDays.asInstanceOf[Int], rightDays.asInstanceOf[Int])
}

override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
defineCodeGen(ctx, ev, (leftDays, rightDays) => {
val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
s"$dtu.subtractDates($leftDays, $rightDays)"
})
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import scala.util.control.NonFatal

import org.apache.spark.sql.catalyst.util.DateTimeConstants._
import org.apache.spark.sql.types.Decimal
import org.apache.spark.unsafe.types.UTF8String
import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}

/**
* Helper functions for converting between internal and external date and time representations.
Expand Down Expand Up @@ -939,4 +939,20 @@ object DateTimeUtils {
None
}
}

/**
* Subtracts two dates.
* @param endDate - the end date, exclusive
* @param startDate - the start date, inclusive
* @return an interval between two dates. The interval can be negative
* if the end date is before the start date.
*/
def subtractDates(endDate: SQLDate, startDate: SQLDate): CalendarInterval = {
val period = Period.between(
LocalDate.ofEpochDay(startDate),
LocalDate.ofEpochDay(endDate))
val months = period.getMonths + 12 * period.getYears
val microseconds = period.getDays * MICROS_PER_DAY
new CalendarInterval(months, microseconds)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions

import java.sql.{Date, Timestamp}
import java.text.SimpleDateFormat
import java.time.{Instant, LocalDateTime, ZoneId, ZoneOffset}
import java.time.{Instant, LocalDate, LocalDateTime, ZoneId, ZoneOffset}
import java.util.{Calendar, Locale, TimeZone}
import java.util.concurrent.TimeUnit
import java.util.concurrent.TimeUnit._
Expand Down Expand Up @@ -1103,19 +1103,40 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {

test("timestamps difference") {
val end = Instant.parse("2019-10-04T11:04:01.123456Z")
checkEvaluation(TimestampDiff(Literal(end), Literal(end)),
checkEvaluation(SubtractTimestamps(Literal(end), Literal(end)),
new CalendarInterval(0, 0, 0))
checkEvaluation(TimestampDiff(Literal(end), Literal(Instant.EPOCH)),
checkEvaluation(SubtractTimestamps(Literal(end), Literal(Instant.EPOCH)),
IntervalUtils.stringToInterval(UTF8String.fromString("interval " +
"436163 hours 4 minutes 1 seconds 123 milliseconds 456 microseconds")))
checkEvaluation(TimestampDiff(Literal(Instant.EPOCH), Literal(end)),
checkEvaluation(SubtractTimestamps(Literal(Instant.EPOCH), Literal(end)),
IntervalUtils.stringToInterval(UTF8String.fromString("interval " +
"-436163 hours -4 minutes -1 seconds -123 milliseconds -456 microseconds")))
checkEvaluation(
TimestampDiff(
SubtractTimestamps(
Literal(Instant.parse("9999-12-31T23:59:59.999999Z")),
Literal(Instant.parse("0001-01-01T00:00:00Z"))),
IntervalUtils.stringToInterval(UTF8String.fromString("interval " +
"87649415 hours 59 minutes 59 seconds 999 milliseconds 999 microseconds")))
}

test("subtract dates") {
val end = LocalDate.of(2019, 10, 5)
checkEvaluation(SubtractDates(Literal(end), Literal(end)),
new CalendarInterval(0, 0))
checkEvaluation(SubtractDates(Literal(end.plusDays(1)), Literal(end)),
IntervalUtils.stringToInterval(UTF8String.fromString("interval 1 days")))
checkEvaluation(SubtractDates(Literal(end.minusDays(1)), Literal(end)),
IntervalUtils.stringToInterval(UTF8String.fromString("interval -1 days")))
val epochDate = Literal(LocalDate.ofEpochDay(0))
checkEvaluation(SubtractDates(Literal(end), epochDate),
IntervalUtils.stringToInterval(UTF8String.fromString("interval 49 years 9 months 4 days")))
checkEvaluation(SubtractDates(epochDate, Literal(end)),
IntervalUtils.stringToInterval(UTF8String.fromString(
"interval -49 years -9 months -4 days")))
checkEvaluation(
SubtractDates(
Literal(LocalDate.of(10000, 1, 1)),
Literal(LocalDate.of(1, 1, 1))),
IntervalUtils.stringToInterval(UTF8String.fromString("interval 9999 years")))
}
}