-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-26243][SQL] Use java.time API for parsing timestamps and dates from JSON #23196
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 26 commits
fb10b91
a9b39ec
ff589f5
4646ded
1c838e0
142f301
606da21
f326042
4120228
6689747
e575162
a35d5bf
2a2085d
55f2eac
57600e2
07fcf46
6b6ea8a
244654b
015fdce
96529f5
07d6031
d761dee
24b1e3d
9a11515
4b01d05
0c7b96b
bbaff09
8af9df9
363482e
07e0bf8
c12da1f
60ab5b1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.util | |
|
|
||
| import java.time._ | ||
| import java.time.format.DateTimeFormatterBuilder | ||
| import java.time.temporal.{ChronoField, TemporalQueries} | ||
| import java.time.temporal.{ChronoField, TemporalAccessor, TemporalQueries} | ||
| import java.util.{Locale, TimeZone} | ||
|
|
||
| import scala.util.Try | ||
|
|
@@ -28,31 +28,44 @@ import org.apache.commons.lang3.time.FastDateFormat | |
|
|
||
| import org.apache.spark.sql.internal.SQLConf | ||
|
|
||
| sealed trait DateTimeFormatter { | ||
| sealed trait TimestampFormatter { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why did we name it
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we have another trait:
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Eh, sorry I mean the file name @cloud-fan.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. will fix it in #23329 |
||
| def parse(s: String): Long // returns microseconds since epoch | ||
| def format(us: Long): String | ||
| } | ||
|
|
||
| class Iso8601DateTimeFormatter( | ||
| trait FormatterUtils { | ||
| protected def zoneId: ZoneId | ||
| protected def buildFormatter( | ||
| pattern: String, | ||
| locale: Locale): java.time.format.DateTimeFormatter = { | ||
| new DateTimeFormatterBuilder() | ||
| .appendPattern(pattern) | ||
| .parseDefaulting(ChronoField.YEAR_OF_ERA, 1970) | ||
| .parseDefaulting(ChronoField.MONTH_OF_YEAR, 1) | ||
| .parseDefaulting(ChronoField.DAY_OF_MONTH, 1) | ||
| .parseDefaulting(ChronoField.HOUR_OF_DAY, 0) | ||
| .parseDefaulting(ChronoField.MINUTE_OF_HOUR, 0) | ||
| .parseDefaulting(ChronoField.SECOND_OF_MINUTE, 0) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why we need default values here?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's say ff you need to parse |
||
| .toFormatter(locale) | ||
| } | ||
| protected def toInstantWithZoneId(temporalAccessor: TemporalAccessor): java.time.Instant = { | ||
| val localDateTime = LocalDateTime.from(temporalAccessor) | ||
| val zonedDateTime = ZonedDateTime.of(localDateTime, zoneId) | ||
| Instant.from(zonedDateTime) | ||
| } | ||
| } | ||
|
|
||
| class Iso8601TimestampFormatter( | ||
| pattern: String, | ||
| timeZone: TimeZone, | ||
| locale: Locale) extends DateTimeFormatter { | ||
| val formatter = new DateTimeFormatterBuilder() | ||
| .appendPattern(pattern) | ||
| .parseDefaulting(ChronoField.YEAR_OF_ERA, 1970) | ||
| .parseDefaulting(ChronoField.MONTH_OF_YEAR, 1) | ||
| .parseDefaulting(ChronoField.DAY_OF_MONTH, 1) | ||
| .parseDefaulting(ChronoField.HOUR_OF_DAY, 0) | ||
| .parseDefaulting(ChronoField.MINUTE_OF_HOUR, 0) | ||
| .parseDefaulting(ChronoField.SECOND_OF_MINUTE, 0) | ||
| .toFormatter(locale) | ||
| locale: Locale) extends TimestampFormatter with FormatterUtils { | ||
| val zoneId = timeZone.toZoneId | ||
| val formatter = buildFormatter(pattern, locale) | ||
|
|
||
| def toInstant(s: String): Instant = { | ||
| val temporalAccessor = formatter.parse(s) | ||
| if (temporalAccessor.query(TemporalQueries.offset()) == null) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sorry I'm not very familiar with this API. what does this condition mean?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. zone offset is unknown after parsing. For example, if you parse |
||
| val localDateTime = LocalDateTime.from(temporalAccessor) | ||
| val zonedDateTime = ZonedDateTime.of(localDateTime, timeZone.toZoneId) | ||
| Instant.from(zonedDateTime) | ||
| toInstantWithZoneId(temporalAccessor) | ||
| } else { | ||
| Instant.from(temporalAccessor) | ||
| } | ||
|
|
@@ -75,10 +88,10 @@ class Iso8601DateTimeFormatter( | |
| } | ||
| } | ||
|
|
||
| class LegacyDateTimeFormatter( | ||
| class LegacyTimestampFormatter( | ||
| pattern: String, | ||
| timeZone: TimeZone, | ||
| locale: Locale) extends DateTimeFormatter { | ||
| locale: Locale) extends TimestampFormatter { | ||
| val format = FastDateFormat.getInstance(pattern, timeZone, locale) | ||
|
|
||
| protected def toMillis(s: String): Long = format.parse(s).getTime | ||
|
|
@@ -90,21 +103,21 @@ class LegacyDateTimeFormatter( | |
| } | ||
| } | ||
|
|
||
| class LegacyFallbackDateTimeFormatter( | ||
| class LegacyFallbackTimestampFormatter( | ||
| pattern: String, | ||
| timeZone: TimeZone, | ||
| locale: Locale) extends LegacyDateTimeFormatter(pattern, timeZone, locale) { | ||
| locale: Locale) extends LegacyTimestampFormatter(pattern, timeZone, locale) { | ||
| override def toMillis(s: String): Long = { | ||
| Try {super.toMillis(s)}.getOrElse(DateTimeUtils.stringToTime(s).getTime) | ||
| } | ||
| } | ||
|
|
||
| object DateTimeFormatter { | ||
| def apply(format: String, timeZone: TimeZone, locale: Locale): DateTimeFormatter = { | ||
| object TimestampFormatter { | ||
| def apply(format: String, timeZone: TimeZone, locale: Locale): TimestampFormatter = { | ||
| if (SQLConf.get.legacyTimeParserEnabled) { | ||
| new LegacyFallbackDateTimeFormatter(format, timeZone, locale) | ||
| new LegacyFallbackTimestampFormatter(format, timeZone, locale) | ||
| } else { | ||
| new Iso8601DateTimeFormatter(format, timeZone, locale) | ||
| new Iso8601TimestampFormatter(format, timeZone, locale) | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -116,29 +129,32 @@ sealed trait DateFormatter { | |
|
|
||
| class Iso8601DateFormatter( | ||
| pattern: String, | ||
| timeZone: TimeZone, | ||
| locale: Locale) extends DateFormatter { | ||
| locale: Locale) extends DateFormatter with FormatterUtils { | ||
|
|
||
| val zoneId = ZoneId.of("UTC") | ||
|
|
||
| val formatter = buildFormatter(pattern, locale) | ||
|
|
||
| val dateTimeFormatter = new Iso8601DateTimeFormatter(pattern, timeZone, locale) | ||
| def toInstant(s: String): Instant = { | ||
| val temporalAccessor = formatter.parse(s) | ||
| toInstantWithZoneId(temporalAccessor) | ||
| } | ||
|
|
||
| override def parse(s: String): Int = { | ||
| val seconds = dateTimeFormatter.toInstant(s).getEpochSecond | ||
| val seconds = toInstant(s).getEpochSecond | ||
| val days = Math.floorDiv(seconds, DateTimeUtils.SECONDS_PER_DAY) | ||
|
|
||
| days.toInt | ||
| } | ||
|
|
||
| override def format(days: Int): String = { | ||
| val instant = Instant.ofEpochSecond(days * DateTimeUtils.SECONDS_PER_DAY) | ||
| dateTimeFormatter.formatter.withZone(timeZone.toZoneId).format(instant) | ||
| formatter.withZone(zoneId).format(instant) | ||
| } | ||
| } | ||
|
|
||
| class LegacyDateFormatter( | ||
| pattern: String, | ||
| timeZone: TimeZone, | ||
| locale: Locale) extends DateFormatter { | ||
| val format = FastDateFormat.getInstance(pattern, timeZone, locale) | ||
| class LegacyDateFormatter(pattern: String, locale: Locale) extends DateFormatter { | ||
| val format = FastDateFormat.getInstance(pattern, locale) | ||
|
|
||
| def parse(s: String): Int = { | ||
| val milliseconds = format.parse(s).getTime | ||
|
|
@@ -153,8 +169,7 @@ class LegacyDateFormatter( | |
|
|
||
| class LegacyFallbackDateFormatter( | ||
| pattern: String, | ||
| timeZone: TimeZone, | ||
| locale: Locale) extends LegacyDateFormatter(pattern, timeZone, locale) { | ||
| locale: Locale) extends LegacyDateFormatter(pattern, locale) { | ||
| override def parse(s: String): Int = { | ||
| Try(super.parse(s)).orElse { | ||
| // If it fails to parse, then tries the way used in 2.0 and 1.x for backwards | ||
|
|
@@ -169,11 +184,11 @@ class LegacyFallbackDateFormatter( | |
| } | ||
|
|
||
| object DateFormatter { | ||
| def apply(format: String, timeZone: TimeZone, locale: Locale): DateFormatter = { | ||
| def apply(format: String, locale: Locale): DateFormatter = { | ||
| if (SQLConf.get.legacyTimeParserEnabled) { | ||
| new LegacyFallbackDateFormatter(format, timeZone, locale) | ||
| new LegacyFallbackDateFormatter(format, locale) | ||
| } else { | ||
| new Iso8601DateFormatter(format, timeZone, locale) | ||
| new Iso8601DateFormatter(format, locale) | ||
| } | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.