-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-30869][SQL] Convert dates to/from timestamps in microseconds precision #27618
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7d812a3
f78d773
ac444aa
ffaccad
29caa4e
3c7ef73
9a4582e
15bedb3
17d9566
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -59,24 +59,22 @@ object DateTimeUtils { | |
| TimeZone.getTimeZone(getZoneId(timeZoneId)) | ||
| } | ||
|
|
||
| // we should use the exact day as Int, for example, (year, month, day) -> day | ||
| def millisToDays(millisUtc: Long): SQLDate = { | ||
| millisToDays(millisUtc, defaultTimeZone().toZoneId) | ||
| def microsToDays(timestamp: SQLTimestamp): SQLDate = { | ||
| microsToDays(timestamp, defaultTimeZone().toZoneId) | ||
| } | ||
|
|
||
| def millisToDays(millisUtc: Long, zoneId: ZoneId): SQLDate = { | ||
| val instant = microsToInstant(fromMillis(millisUtc)) | ||
| def microsToDays(timestamp: SQLTimestamp, zoneId: ZoneId): SQLDate = { | ||
| val instant = microsToInstant(timestamp) | ||
| localDateToDays(LocalDateTime.ofInstant(instant, zoneId).toLocalDate) | ||
| } | ||
|
|
||
| // reverse of millisToDays | ||
| def daysToMillis(days: SQLDate): Long = { | ||
| daysToMillis(days, defaultTimeZone().toZoneId) | ||
| def daysToMicros(days: SQLDate): SQLTimestamp = { | ||
| daysToMicros(days, defaultTimeZone().toZoneId) | ||
| } | ||
|
|
||
| def daysToMillis(days: SQLDate, zoneId: ZoneId): Long = { | ||
| def daysToMicros(days: SQLDate, zoneId: ZoneId): SQLTimestamp = { | ||
| val instant = daysToLocalDate(days).atStartOfDay(zoneId).toInstant | ||
| toMillis(instantToMicros(instant)) | ||
| instantToMicros(instant) | ||
| } | ||
|
|
||
| // Converts Timestamp to string according to Hive TimestampWritable convention. | ||
|
|
@@ -88,14 +86,14 @@ object DateTimeUtils { | |
| * Returns the number of days since epoch from java.sql.Date. | ||
| */ | ||
| def fromJavaDate(date: Date): SQLDate = { | ||
| millisToDays(date.getTime) | ||
| microsToDays(millisToMicros(date.getTime)) | ||
| } | ||
|
|
||
| /** | ||
| * Returns a java.sql.Date from number of days since epoch. | ||
| */ | ||
| def toJavaDate(daysSinceEpoch: SQLDate): Date = { | ||
| new Date(daysToMillis(daysSinceEpoch)) | ||
| new Date(microsToMillis(daysToMicros(daysSinceEpoch))) | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -138,7 +136,7 @@ object DateTimeUtils { | |
| * Converts the timestamp to milliseconds since epoch. In spark timestamp values have microseconds | ||
| * precision, so this conversion is lossy. | ||
| */ | ||
| def toMillis(us: SQLTimestamp): Long = { | ||
| def microsToMillis(us: SQLTimestamp): Long = { | ||
| // When the timestamp is negative i.e before 1970, we need to adjust the millseconds portion. | ||
| // Example - 1965-01-01 10:11:12.123456 is represented as (-157700927876544) in micro precision. | ||
| // In millis precision the above needs to be represented as (-157700927877). | ||
|
|
@@ -148,7 +146,7 @@ object DateTimeUtils { | |
| /* | ||
| * Converts milliseconds since epoch to SQLTimestamp. | ||
| */ | ||
| def fromMillis(millis: Long): SQLTimestamp = { | ||
| def millisToMicros(millis: Long): SQLTimestamp = { | ||
| Math.multiplyExact(millis, MICROS_PER_MILLIS) | ||
| } | ||
|
|
||
|
|
@@ -574,10 +572,8 @@ object DateTimeUtils { | |
| time2: SQLTimestamp, | ||
| roundOff: Boolean, | ||
| zoneId: ZoneId): Double = { | ||
| val millis1 = toMillis(time1) | ||
| val millis2 = toMillis(time2) | ||
| val date1 = millisToDays(millis1, zoneId) | ||
| val date2 = millisToDays(millis2, zoneId) | ||
| val date1 = microsToDays(time1, zoneId) | ||
| val date2 = microsToDays(time2, zoneId) | ||
| val (year1, monthInYear1, dayInMonth1, daysToMonthEnd1) = splitDate(date1) | ||
| val (year2, monthInYear2, dayInMonth2, daysToMonthEnd2) = splitDate(date2) | ||
|
|
||
|
|
@@ -591,8 +587,8 @@ object DateTimeUtils { | |
| } | ||
| // using milliseconds can cause precision loss with more than 8 digits | ||
| // we follow Hive's implementation which uses seconds | ||
| val secondsInDay1 = MILLISECONDS.toSeconds(millis1 - daysToMillis(date1, zoneId)) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shall we call
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Highly likely, yes. I will prepare a separate fix.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The error of rounding is invisible in dividing by
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah, this is "seconds in day", so it's always positive. |
||
| val secondsInDay2 = MILLISECONDS.toSeconds(millis2 - daysToMillis(date2, zoneId)) | ||
| val secondsInDay1 = MICROSECONDS.toSeconds(time1 - daysToMicros(date1, zoneId)) | ||
cloud-fan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| val secondsInDay2 = MICROSECONDS.toSeconds(time2 - daysToMicros(date2, zoneId)) | ||
| val secondsDiff = (dayInMonth1 - dayInMonth2) * SECONDS_PER_DAY + secondsInDay1 - secondsInDay2 | ||
| val secondsInMonth = DAYS.toSeconds(31) | ||
| val diff = monthDiff + secondsDiff / secondsInMonth.toDouble | ||
|
|
@@ -711,21 +707,17 @@ object DateTimeUtils { | |
| def truncTimestamp(t: SQLTimestamp, level: Int, zoneId: ZoneId): SQLTimestamp = { | ||
| level match { | ||
| case TRUNC_TO_MICROSECOND => t | ||
| case TRUNC_TO_MILLISECOND => | ||
| t - Math.floorMod(t, MICROS_PER_MILLIS) | ||
| case TRUNC_TO_SECOND => | ||
| t - Math.floorMod(t, MICROS_PER_SECOND) | ||
| case TRUNC_TO_MINUTE => | ||
| t - Math.floorMod(t, MICROS_PER_MINUTE) | ||
| case TRUNC_TO_HOUR => truncToUnit(t, zoneId, ChronoUnit.HOURS) | ||
| case TRUNC_TO_DAY => truncToUnit(t, zoneId, ChronoUnit.DAYS) | ||
| case _ => | ||
| val millis = toMillis(t) | ||
| val truncated = level match { | ||
| case TRUNC_TO_MILLISECOND => millis | ||
| case TRUNC_TO_SECOND => | ||
| millis - Math.floorMod(millis, MILLIS_PER_SECOND) | ||
| case TRUNC_TO_MINUTE => | ||
| millis - Math.floorMod(millis, MILLIS_PER_MINUTE) | ||
| case _ => // Try to truncate date levels | ||
| val dDays = millisToDays(millis, zoneId) | ||
| daysToMillis(truncDate(dDays, level), zoneId) | ||
| } | ||
| fromMillis(truncated) | ||
| case _ => // Try to truncate date levels | ||
| val dDays = microsToDays(t, zoneId) | ||
| daysToMicros(truncDate(dDays, level), zoneId) | ||
| } | ||
| } | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.