Skip to content
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -224,12 +224,12 @@ object DateTimeUtils {
* value. The return type is [[Option]] in order to distinguish between 0L and null. The following
* formats are allowed:
*
* `yyyy`
* `yyyy-[m]m`
* `yyyy-[m]m-[d]d`
* `yyyy-[m]m-[d]d `
* `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
* `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
* `[+-]y*`
* `[+-]y*-[m]m`
* `[+-]y*-[m]m-[d]d`
* `[+-]y*-[m]m-[d]d `
* `[+-]y*-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
* `[+-]y*-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
* `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
* `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
*
Expand All @@ -249,7 +249,7 @@ object DateTimeUtils {
* the input string can't be parsed as timestamp, the result timestamp segments are empty.
*/
private def parseTimestampString(s: UTF8String): (Array[Int], Option[ZoneId], Boolean) = {
if (s == null) {
if (s == null || s.trimAll().numBytes() == 0) {
return (Array.empty, None, false)
}
var tz: Option[String] = None
Expand All @@ -260,6 +260,11 @@ object DateTimeUtils {
var j = 0
var digitsMilli = 0
var justTime = false
var sign = 1
if (bytes(j) == '-' || bytes(j) == '+') {
sign = if (bytes(j) == '-') -1 else 1
j += 1
}
while (j < bytes.length) {
val b = bytes(j)
val parsedValue = b - '0'.toByte
Expand All @@ -269,10 +274,6 @@ object DateTimeUtils {
i += 3
} else if (i < 2) {
if (b == '-') {
if (i == 0 && j != 4) {
// year should have exact four digits
return (Array.empty, None, false)
}
segments(i) = currentSegmentValue
currentSegmentValue = 0
i += 1
Expand Down Expand Up @@ -339,10 +340,6 @@ object DateTimeUtils {
}

segments(i) = currentSegmentValue
if (!justTime && i == 0 && j != 4) {
// year should have exact four digits
return (Array.empty, None, false)
}

while (digitsMilli < 6) {
segments(6) *= 10
Expand All @@ -360,6 +357,7 @@ object DateTimeUtils {
case "-" => ZoneOffset.ofHoursMinutes(-segments(7), -segments(8))
case zoneName: String => getZoneId(zoneName.trim)
}
segments(0) *= sign
(segments, zoneId, justTime)
}

Expand All @@ -368,12 +366,12 @@ object DateTimeUtils {
* value. The return type is [[Option]] in order to distinguish between 0L and null. The following
* formats are allowed:
*
* `yyyy`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should just say "Please refer to parseTimestampString for the allowed formats." here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

* `yyyy-[m]m`
* `yyyy-[m]m-[d]d`
* `yyyy-[m]m-[d]d `
* `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
* `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
* `[+-]y*`
* `[+-]y*-[m]m`
* `[+-]y*-[m]m-[d]d`
* `[+-]y*-[m]m-[d]d `
* `[+-]y*-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
* `[+-]y*-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
* `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
* `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
*
Expand Down Expand Up @@ -425,12 +423,12 @@ object DateTimeUtils {
* The return type is [[Option]] in order to distinguish between 0L and null. The following
* formats are allowed:
*
* `yyyy`
* `yyyy-[m]m`
* `yyyy-[m]m-[d]d`
* `yyyy-[m]m-[d]d `
* `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
* `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
* `[+-]y*`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

* `[+-]y*-[m]m`
* `[+-]y*-[m]m-[d]d`
* `[+-]y*-[m]m-[d]d `
* `[+-]y*-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
* `[+-]y*-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
*
* where `zone_id` should have one of the forms:
* - Z - Zulu time zone UTC+0
Expand Down Expand Up @@ -518,29 +516,30 @@ object DateTimeUtils {
* The return type is [[Option]] in order to distinguish between 0 and null. The following
* formats are allowed:
*
* `yyyy`
* `yyyy-[m]m`
* `yyyy-[m]m-[d]d`
* `yyyy-[m]m-[d]d `
* `yyyy-[m]m-[d]d *`
* `yyyy-[m]m-[d]dT*`
* `[+-]y*`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

* `[+-]y*-[m]m`
* `[+-]y*-[m]m-[d]d`
* `[+-]y*-[m]m-[d]d `
* `[+-]y*-[m]m-[d]d *`
* `[+-]y*-[m]m-[d]dT*`
*/
def stringToDate(s: UTF8String): Option[Int] = {
if (s == null) {
if (s == null || s.trimAll().numBytes() == 0) {
return None
}
val segments: Array[Int] = Array[Int](1, 1, 1)
var sign = 1
var i = 0
var currentSegmentValue = 0
val bytes = s.trimAll().getBytes
var j = 0
if (bytes(j) == '-' || bytes(j) == '+') {
sign = if (bytes(j) == '-') -1 else 1
j += 1
}
while (j < bytes.length && (i < 3 && !(bytes(j) == ' ' || bytes(j) == 'T'))) {
val b = bytes(j)
if (i < 2 && b == '-') {
if (i == 0 && j != 4) {
// year should have exact four digits
return None
}
segments(i) = currentSegmentValue
currentSegmentValue = 0
i += 1
Expand All @@ -554,17 +553,13 @@ object DateTimeUtils {
}
j += 1
}
if (i == 0 && j != 4) {
// year should have exact four digits
return None
}
if (i < 2 && j < bytes.length) {
// For the `yyyy` and `yyyy-[m]m` formats, entire input must be consumed.
return None
}
segments(i) = currentSegmentValue
try {
val localDate = LocalDate.of(segments(0), segments(1), segments(2))
val localDate = LocalDate.of(sign * segments(0), segments(1), segments(2))
Some(localDateToDays(localDate))
} catch {
case NonFatal(_) => None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,6 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase {
s"Cannot cast $str to TimestampType.")
}

checkCastWithParseError("123")
checkCastWithParseError("2015-03-18 123142")
checkCastWithParseError("2015-03-18T123123")
checkCastWithParseError("2015-03-18X")
Expand All @@ -392,8 +391,6 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase {
s"Cannot cast $str to DateType.")
}

checkCastWithParseError("12345")
checkCastWithParseError("12345-12-18")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's move the removed tests to the base cast suite

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

checkCastWithParseError("2015-13-18")
checkCastWithParseError("2015-03-128")
checkCastWithParseError("2015/03/18")
Expand All @@ -413,7 +410,6 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase {
test("SPARK-35720: cast invalid string input to timestamp without time zone") {
Seq("00:00:00",
"a",
"123",
"a2021-06-17",
"2021-06-17abc",
"2021-06-17 00:00:00ABC").foreach { invalidInput =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ class CastSuite extends CastSuiteBase {
checkEvaluation(
cast(cast(cast(cast(cast(cast("5", TimestampType, UTC_OPT), ByteType),
DecimalType.SYSTEM_DEFAULT), LongType), StringType), ShortType),
null)
-128.toShort)
checkEvaluation(cast(cast(cast(cast(cast(cast("5", DecimalType.SYSTEM_DEFAULT),
ByteType), TimestampType), LongType), StringType), ShortType),
5.toShort)
Expand Down Expand Up @@ -569,7 +569,6 @@ class CastSuite extends CastSuiteBase {
test("SPARK-35720: cast invalid string input to timestamp without time zone") {
Seq("00:00:00",
"a",
"123",
"a2021-06-17",
"2021-06-17abc",
"2021-06-17 00:00:00ABC").foreach { invalidInput =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -194,9 +194,11 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
// before epoch
checkHiveHashForDateType("1800-01-01", -62091)

// negative year
checkHiveHashForDateType("-1212-01-01", -1162202)

// Invalid input: bad date string. Hive returns 0 for such cases
intercept[NoSuchElementException](checkHiveHashForDateType("0-0-0", 0))
intercept[NoSuchElementException](checkHiveHashForDateType("-1212-01-01", 0))
intercept[NoSuchElementException](checkHiveHashForDateType("2016-99-99", 0))

// Invalid input: Empty string. Hive returns 0 for this case
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,13 +142,26 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
assert(toDate("2015.03.18").isEmpty)
assert(toDate("20150318").isEmpty)
assert(toDate("2015-031-8").isEmpty)
assert(toDate("02015-03-18").isEmpty)
assert(toDate("015-03-18").isEmpty)
assert(toDate("015").isEmpty)
assert(toDate("02015").isEmpty)
assert(toDate("1999 08 01").isEmpty)
assert(toDate("1999-08 01").isEmpty)
assert(toDate("1999 08").isEmpty)
assert(toDate("").isEmpty)
assert(toDate(" ").isEmpty)
}

test("SPARK-35780: support full range of date string") {
assert(toDate("02015-03-18").get === days(2015, 3, 18))
assert(toDate("015-03-18").get === days(15, 3, 18))
assert(toDate("015").get === days(15, 1, 1))
assert(toDate("02015").get === days(2015, 1, 1))
assert(toDate("-02015").get === days(-2015, 1, 1))
assert(toDate("999999-1-28").get === days(999999, 1, 28))
assert(toDate("-999999-1-28").get === days(-999999, 1, 28))
assert(toDate("1-1-28").get === days(1, 1, 28))
assert(toDate("5881580-7-11").get === days(5881580, 7, 11))
assert(toDate("5881580-7-12").isEmpty)
assert(toDate("-5877641-6-23").get === days(-5877641, 6, 23))
assert(toDate("-5877641-6-22").isEmpty)
}

private def toTimestamp(str: String, zoneId: ZoneId): Option[Long] = {
Expand Down Expand Up @@ -253,23 +266,21 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
expected = Option(date(2011, 5, 6, 7, 8, 9, 100000, zid = zid))
checkStringToTimestamp("2011-05-06 07:08:09.1000", expected)

checkStringToTimestamp("238", None)
checkStringToTimestamp("00238", None)
checkStringToTimestamp("2015-03-18 123142", None)
checkStringToTimestamp("2015-03-18T123123", None)
checkStringToTimestamp("2015-03-18X", None)
checkStringToTimestamp("2015/03/18", None)
checkStringToTimestamp("2015.03.18", None)
checkStringToTimestamp("20150318", None)
checkStringToTimestamp("2015-031-8", None)
checkStringToTimestamp("02015-01-18", None)
checkStringToTimestamp("015-01-18", None)
checkStringToTimestamp("2015-03-18T12:03.17-20:0", None)
checkStringToTimestamp("2015-03-18T12:03.17-0:70", None)
checkStringToTimestamp("2015-03-18T12:03.17-1:0:0", None)
checkStringToTimestamp("1999 08 01", None)
checkStringToTimestamp("1999-08 01", None)
checkStringToTimestamp("1999 08", None)
checkStringToTimestamp("", None)
checkStringToTimestamp(" ", None)

// Truncating the fractional seconds
expected = Option(date(2015, 3, 18, 12, 3, 17, 123456, zid = UTC))
Expand All @@ -283,6 +294,20 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
}
}

test("SPARK-35780: support full range of timestamp string") {
def checkStringToTimestamp(str: String, expected: Option[Long]): Unit = {
assert(toTimestamp(str, UTC) === expected)
}

checkStringToTimestamp("-1969-12-31 16:00:00", Option(date(-1969, 12, 31, 16, zid = UTC)))
checkStringToTimestamp("02015-03-18 16:00:00", Option(date(2015, 3, 18, 16, zid = UTC)))
checkStringToTimestamp("015-03-18 16:00:00", Option(date(15, 3, 18, 16, zid = UTC)))
checkStringToTimestamp("000001", Option(date(1, 1, 1, 0, zid = UTC)))
checkStringToTimestamp("-000001", Option(date(-1, 1, 1, 0, zid = UTC)))
checkStringToTimestamp("238", Option(date(238, 1, 1, 0, zid = UTC)))
checkStringToTimestamp("00238", Option(date(238, 1, 1, 0, zid = UTC)))
}

test("SPARK-15379: special invalid date string") {
// Test stringToDate
assert(toDate("2015-02-29 00:00:00").isEmpty)
Expand Down
26 changes: 26 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/datetime.sql
Original file line number Diff line number Diff line change
Expand Up @@ -257,3 +257,29 @@ select to_timestamp_ntz('2021-06-25 10:11:12') - interval '10-9' year to month;
select to_timestamp_ntz('2021-06-25 10:11:12') - interval '20 15' day to hour;
select to_timestamp_ntz('2021-06-25 10:11:12') - interval '20 15:40' day to minute;
select to_timestamp_ntz('2021-06-25 10:11:12') - interval '20 15:40:32.99899999' day to second;

-- datetime with year outside [0000-9999]
-- enable java8API for datetime because the `java.sql.Date` only supports year from 0 to 9999
set spark.sql.datetime.java8API.enabled=true;
select date'02015-03-18';
select date'015';
select date'-1-1-28';
-- Int.MaxValue and Int.MaxValue + 1 day
select cast('5881580-7-11' as date);;
select cast('5881580-7-12' as date);
-- Int.MinValue and Int.MinValue - 1 day
select cast('-5877641-6-23' as date);
select cast('-5877641-6-22' as date);

select timestamp'-1969-12-31 16:00:00';
select timestamp'02015-03-18 16:00:00';
select timestamp'015-03-18 16:00:00';
select timestamp'-000001';
select timestamp'99999-03-18T12:03:17';
-- Long.MaxValue and Long.MaxValue + 1 micro seconds
select cast('294247-01-10T04:00:54.775807Z' as timestamp);
select cast('294247-01-10T04:00:54.775808Z' as timestamp);
-- Long.MinValue and Long.MinValue - 1 micro seconds
select cast('-290308-12-21T19:59:05.224192Z' as timestamp);
select cast('-290308-12-21T19:59:05.224191Z' as timestamp);
set spark.sql.datetime.java8API.enabled=false;
Loading