-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-35780][SQL] Support DATE/TIMESTAMP literals across the full range #32959
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
457d3b6
633781f
b250bc7
5b4fe62
e94885c
e9558cb
7e7a81a
e0ff811
d9f8af4
9a07c14
9e96020
b383571
dda6136
18d7766
e146045
4091859
4043889
8d69c88
cd330a6
4723f8e
538463a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -224,12 +224,12 @@ object DateTimeUtils { | |
| * value. The return type is [[Option]] in order to distinguish between 0L and null. The following | ||
| * formats are allowed: | ||
| * | ||
| * `yyyy` | ||
| * `yyyy-[m]m` | ||
| * `yyyy-[m]m-[d]d` | ||
| * `yyyy-[m]m-[d]d ` | ||
| * `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` | ||
| * `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` | ||
| * `[+-]y*` | ||
| * `[+-]y*-[m]m` | ||
| * `[+-]y*-[m]m-[d]d` | ||
| * `[+-]y*-[m]m-[d]d ` | ||
| * `[+-]y*-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` | ||
| * `[+-]y*-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` | ||
| * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` | ||
| * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` | ||
| * | ||
|
|
@@ -249,7 +249,7 @@ object DateTimeUtils { | |
| * the input string can't be parsed as timestamp, the result timestamp segments are empty. | ||
| */ | ||
| private def parseTimestampString(s: UTF8String): (Array[Int], Option[ZoneId], Boolean) = { | ||
| if (s == null) { | ||
| if (s == null || s.trimAll().numBytes() == 0) { | ||
| return (Array.empty, None, false) | ||
| } | ||
| var tz: Option[String] = None | ||
|
|
@@ -260,6 +260,11 @@ object DateTimeUtils { | |
| var j = 0 | ||
| var digitsMilli = 0 | ||
| var justTime = false | ||
| var sign = 1 | ||
| if (bytes(j) == '-' || bytes(j) == '+') { | ||
| sign = if (bytes(j) == '-') -1 else 1 | ||
| j += 1 | ||
| } | ||
| while (j < bytes.length) { | ||
| val b = bytes(j) | ||
| val parsedValue = b - '0'.toByte | ||
|
|
@@ -269,10 +274,6 @@ object DateTimeUtils { | |
| i += 3 | ||
| } else if (i < 2) { | ||
| if (b == '-') { | ||
| if (i == 0 && j != 4) { | ||
cloud-fan marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| // year should have exact four digits | ||
cloud-fan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| return (Array.empty, None, false) | ||
| } | ||
| segments(i) = currentSegmentValue | ||
| currentSegmentValue = 0 | ||
| i += 1 | ||
|
|
@@ -339,10 +340,6 @@ object DateTimeUtils { | |
| } | ||
|
|
||
| segments(i) = currentSegmentValue | ||
| if (!justTime && i == 0 && j != 4) { | ||
| // year should have exact four digits | ||
| return (Array.empty, None, false) | ||
| } | ||
|
|
||
| while (digitsMilli < 6) { | ||
| segments(6) *= 10 | ||
|
|
@@ -360,6 +357,7 @@ object DateTimeUtils { | |
| case "-" => ZoneOffset.ofHoursMinutes(-segments(7), -segments(8)) | ||
| case zoneName: String => getZoneId(zoneName.trim) | ||
| } | ||
| segments(0) *= sign | ||
| (segments, zoneId, justTime) | ||
| } | ||
|
|
||
|
|
@@ -368,12 +366,12 @@ object DateTimeUtils { | |
| * value. The return type is [[Option]] in order to distinguish between 0L and null. The following | ||
| * formats are allowed: | ||
| * | ||
| * `yyyy` | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we should just say "Please refer to parseTimestampString for the allowed formats." here
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
| * `yyyy-[m]m` | ||
| * `yyyy-[m]m-[d]d` | ||
| * `yyyy-[m]m-[d]d ` | ||
| * `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` | ||
| * `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` | ||
| * `[+-]y*` | ||
| * `[+-]y*-[m]m` | ||
| * `[+-]y*-[m]m-[d]d` | ||
| * `[+-]y*-[m]m-[d]d ` | ||
| * `[+-]y*-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` | ||
| * `[+-]y*-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` | ||
| * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` | ||
| * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` | ||
| * | ||
|
|
@@ -425,12 +423,12 @@ object DateTimeUtils { | |
| * The return type is [[Option]] in order to distinguish between 0L and null. The following | ||
| * formats are allowed: | ||
| * | ||
| * `yyyy` | ||
| * `yyyy-[m]m` | ||
| * `yyyy-[m]m-[d]d` | ||
| * `yyyy-[m]m-[d]d ` | ||
| * `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` | ||
| * `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` | ||
| * `[+-]y*` | ||
|
||
| * `[+-]y*-[m]m` | ||
| * `[+-]y*-[m]m-[d]d` | ||
| * `[+-]y*-[m]m-[d]d ` | ||
| * `[+-]y*-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` | ||
| * `[+-]y*-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` | ||
| * | ||
| * where `zone_id` should have one of the forms: | ||
| * - Z - Zulu time zone UTC+0 | ||
|
|
@@ -518,29 +516,30 @@ object DateTimeUtils { | |
| * The return type is [[Option]] in order to distinguish between 0 and null. The following | ||
| * formats are allowed: | ||
| * | ||
| * `yyyy` | ||
| * `yyyy-[m]m` | ||
| * `yyyy-[m]m-[d]d` | ||
| * `yyyy-[m]m-[d]d ` | ||
| * `yyyy-[m]m-[d]d *` | ||
| * `yyyy-[m]m-[d]dT*` | ||
| * `[+-]y*` | ||
|
||
| * `[+-]y*-[m]m` | ||
| * `[+-]y*-[m]m-[d]d` | ||
| * `[+-]y*-[m]m-[d]d ` | ||
| * `[+-]y*-[m]m-[d]d *` | ||
| * `[+-]y*-[m]m-[d]dT*` | ||
| */ | ||
| def stringToDate(s: UTF8String): Option[Int] = { | ||
| if (s == null) { | ||
| if (s == null || s.trimAll().numBytes() == 0) { | ||
| return None | ||
| } | ||
| val segments: Array[Int] = Array[Int](1, 1, 1) | ||
| var sign = 1 | ||
| var i = 0 | ||
| var currentSegmentValue = 0 | ||
| val bytes = s.trimAll().getBytes | ||
| var j = 0 | ||
| if (bytes(j) == '-' || bytes(j) == '+') { | ||
| sign = if (bytes(j) == '-') -1 else 1 | ||
| j += 1 | ||
| } | ||
| while (j < bytes.length && (i < 3 && !(bytes(j) == ' ' || bytes(j) == 'T'))) { | ||
| val b = bytes(j) | ||
| if (i < 2 && b == '-') { | ||
| if (i == 0 && j != 4) { | ||
| // year should have exact four digits | ||
| return None | ||
| } | ||
| segments(i) = currentSegmentValue | ||
| currentSegmentValue = 0 | ||
| i += 1 | ||
|
|
@@ -554,17 +553,13 @@ object DateTimeUtils { | |
| } | ||
| j += 1 | ||
| } | ||
| if (i == 0 && j != 4) { | ||
| // year should have exact four digits | ||
| return None | ||
| } | ||
| if (i < 2 && j < bytes.length) { | ||
| // For the `yyyy` and `yyyy-[m]m` formats, entire input must be consumed. | ||
| return None | ||
| } | ||
| segments(i) = currentSegmentValue | ||
| try { | ||
| val localDate = LocalDate.of(segments(0), segments(1), segments(2)) | ||
| val localDate = LocalDate.of(sign * segments(0), segments(1), segments(2)) | ||
| Some(localDateToDays(localDate)) | ||
| } catch { | ||
| case NonFatal(_) => None | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -371,7 +371,6 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { | |
| s"Cannot cast $str to TimestampType.") | ||
| } | ||
|
|
||
| checkCastWithParseError("123") | ||
| checkCastWithParseError("2015-03-18 123142") | ||
| checkCastWithParseError("2015-03-18T123123") | ||
| checkCastWithParseError("2015-03-18X") | ||
|
|
@@ -392,8 +391,6 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { | |
| s"Cannot cast $str to DateType.") | ||
| } | ||
|
|
||
| checkCastWithParseError("12345") | ||
| checkCastWithParseError("12345-12-18") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let's move the removed tests to the base cast suite
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
| checkCastWithParseError("2015-13-18") | ||
| checkCastWithParseError("2015-03-128") | ||
| checkCastWithParseError("2015/03/18") | ||
|
|
@@ -413,7 +410,6 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { | |
| test("SPARK-35720: cast invalid string input to timestamp without time zone") { | ||
| Seq("00:00:00", | ||
| "a", | ||
| "123", | ||
| "a2021-06-17", | ||
| "2021-06-17abc", | ||
| "2021-06-17 00:00:00ABC").foreach { invalidInput => | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.