Skip to content
Closed
Next Next commit
Optimize greg to jul days rebasing
  • Loading branch information
MaxGekk committed Mar 29, 2020
commit 839b0294bf3f54167009c63d22d6b5e82ca53be8
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import java.nio.charset.StandardCharsets
import java.sql.{Date, Timestamp}
import java.time._
import java.time.temporal.{ChronoField, ChronoUnit, IsoFields}
import java.util
import java.util.{Calendar, Locale, TimeZone}
import java.util.concurrent.TimeUnit._

Expand Down Expand Up @@ -1062,6 +1063,11 @@ object DateTimeUtils {
Math.toIntExact(localDate.toEpochDay)
}

private val gregToJulDay = Array(
-719162, -682944, -646420, -609896, -536847, -500323, -463799,
-390750, -354226, -317702, -244653, -208129, -171605, -141427)
private val gregToJulDiff = Array(-2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0)

/**
* Rebasing days since the epoch to store the same number of days
* as by Spark 2.4 and earlier versions. Spark 3.0 switched to
Expand All @@ -1079,14 +1085,9 @@ object DateTimeUtils {
* @return The rebased number of days since the epoch in Julian calendar.
*/
def rebaseGregorianToJulianDays(days: Int): Int = {
val localDate = LocalDate.ofEpochDay(days)
val utcCal = new Calendar.Builder()
// `gregory` is a hybrid calendar that supports both
// the Julian and Gregorian calendar systems
.setCalendarType("gregory")
.setTimeZone(TimeZoneUTC)
.setDate(localDate.getYear, localDate.getMonthValue - 1, localDate.getDayOfMonth)
.build()
Math.toIntExact(Math.floorDiv(utcCal.getTimeInMillis, MILLIS_PER_DAY))
val index = util.Arrays.binarySearch(gregToJulDay, days)
val diff = if (index >= 0) gregToJulDiff(index) else gregToJulDiff(-index - 2)
val rebased = days + diff
rebased
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,10 @@ package org.apache.spark.sql.catalyst.util
import java.sql.{Date, Timestamp}
import java.text.SimpleDateFormat
import java.time.{Instant, LocalDate, LocalDateTime, LocalTime, ZoneId}
import java.util.{Locale, TimeZone}
import java.util.{Calendar, Locale, TimeZone}
import java.util.concurrent.TimeUnit

import org.scalatest.Matchers

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.plans.SQLHelper
import org.apache.spark.sql.catalyst.util.DateTimeConstants._
Expand Down Expand Up @@ -765,4 +764,27 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
}
}
}

test("optimization of days rebasing - Gregorian to Julian") {
def refRebaseGregorianToJulianDays(days: Int): Int = {
val localDate = LocalDate.ofEpochDay(days)
val utcCal = new Calendar.Builder()
// `gregory` is a hybrid calendar that supports both
// the Julian and Gregorian calendar systems
.setCalendarType("gregory")
.setTimeZone(TimeZoneUTC)
.setDate(localDate.getYear, localDate.getMonthValue - 1, localDate.getDayOfMonth)
.build()
Math.toIntExact(Math.floorDiv(utcCal.getTimeInMillis, MILLIS_PER_DAY))
}

val start = localDateToDays(LocalDate.of(1, 1, 1))
val end = localDateToDays(LocalDate.of(2030, 1, 1))

var days = start
while (days < end) {
assert(rebaseGregorianToJulianDays(days) === refRebaseGregorianToJulianDays(days))
days += 1
}
}
}