apache · cloud-fan · May 18, 2020 · May 19, 2020 · May 20, 2020 · May 21, 2020
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
@@ -53,7 +53,7 @@ class Iso8601DateFormatter(
     val specialDate = convertSpecialDate(s.trim, zoneId)
     specialDate.getOrElse {
       try {
-        val localDate = LocalDate.parse(s, formatter)
+        val localDate = toLocalDate(formatter.parse(s))
         localDateToDays(localDate)
       } catch checkDiffResult(s, legacyFormatter.parse)
     }

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.util
 
 import java.time._
 import java.time.chrono.IsoChronology
-import java.time.format.{DateTimeFormatter, DateTimeFormatterBuilder, DateTimeParseException, ResolverStyle}
+import java.time.format.{DateTimeFormatter, DateTimeFormatterBuilder, ResolverStyle}
 import java.time.temporal.{ChronoField, TemporalAccessor, TemporalQueries}
 import java.util.Locale
 
@@ -31,17 +31,52 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy._
 
 trait DateTimeFormatterHelper {
+  private def getOrDefault(accessor: TemporalAccessor, field: ChronoField, default: Int): Int = {
+    if (accessor.isSupported(field)) {
+      accessor.get(field)
+    } else {
+      default
+    }
+  }
+
+  protected def toLocalDate(accessor: TemporalAccessor): LocalDate = {
+    val localDate = accessor.query(TemporalQueries.localDate())
+    // If all the date fields are specified, return the local date directly.
+    if (localDate != null) return localDate
+
+    // Users may want to parse only a few datetime fields from a string and extract these fields
+    // later, and we should provide default values for missing fields.
+    // To be compatible with Spark 2.4, we pick 1970 as the default value of year.
+    val year = getOrDefault(accessor, ChronoField.YEAR, 1970)
+    val month = getOrDefault(accessor, ChronoField.MONTH_OF_YEAR, 1)
+    val day = getOrDefault(accessor, ChronoField.DAY_OF_MONTH, 1)
+    LocalDate.of(year, month, day)
+  }
+
+  private def toLocalTime(accessor: TemporalAccessor): LocalTime = {
+    val localTime = accessor.query(TemporalQueries.localTime())
+    // If all the time fields are specified, return the local time directly.
+    if (localTime != null) return localTime
+
+    val hour = if (accessor.isSupported(ChronoField.HOUR_OF_DAY)) {
+      accessor.get(ChronoField.HOUR_OF_DAY)
+    } else if (accessor.isSupported(ChronoField.HOUR_OF_AMPM)) {
+      // When we reach here, it means am/pm is not specified. Here we assume it's am.
+      accessor.get(ChronoField.HOUR_OF_AMPM)
+    } else {
+      0
+    }
+    val minute = getOrDefault(accessor, ChronoField.MINUTE_OF_HOUR, 0)
+    val second = getOrDefault(accessor, ChronoField.SECOND_OF_MINUTE, 0)
+    val nanoSecond = getOrDefault(accessor, ChronoField.NANO_OF_SECOND, 0)
+    LocalTime.of(hour, minute, second, nanoSecond)
+  }
+
   // Converts the parsed temporal object to ZonedDateTime. It sets time components to zeros
   // if they does not exist in the parsed object.
-  protected def toZonedDateTime(
-      temporalAccessor: TemporalAccessor,
-      zoneId: ZoneId): ZonedDateTime = {
-    // Parsed input might not have time related part. In that case, time component is set to zeros.
-    val parsedLocalTime = temporalAccessor.query(TemporalQueries.localTime)
-    val localTime = if (parsedLocalTime == null) LocalTime.MIDNIGHT else parsedLocalTime
-    // Parsed input must have date component. At least, year must present in temporalAccessor.
-    val localDate = temporalAccessor.query(TemporalQueries.localDate)
-
+  protected def toZonedDateTime(accessor: TemporalAccessor, zoneId: ZoneId): ZonedDateTime = {
+    val localDate = toLocalDate(accessor)
+    val localTime = toLocalTime(accessor)
     ZonedDateTime.of(localDate, localTime, zoneId)
   }
 
@@ -72,19 +107,15 @@ trait DateTimeFormatterHelper {
   // DateTimeParseException will address by the caller side.
   protected def checkDiffResult[T](
       s: String, legacyParseFunc: String => T): PartialFunction[Throwable, T] = {
-    case e: DateTimeParseException if SQLConf.get.legacyTimeParserPolicy == EXCEPTION =>
-      val res = try {
-        Some(legacyParseFunc(s))
+    case e: DateTimeException if SQLConf.get.legacyTimeParserPolicy == EXCEPTION =>
+      try {
+        legacyParseFunc(s)
       } catch {
-        case _: Throwable => None
-      }
-      if (res.nonEmpty) {
-        throw new SparkUpgradeException("3.0", s"Fail to parse '$s' in the new parser. You can " +
-          s"set ${SQLConf.LEGACY_TIME_PARSER_POLICY.key} to LEGACY to restore the behavior " +
-          s"before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.", e)
-      } else {
-        throw e
+        case _: Throwable => throw e
       }
+      throw new SparkUpgradeException("3.0", s"Fail to parse '$s' in the new parser. You can " +
+        s"set ${SQLConf.LEGACY_TIME_PARSER_POLICY.key} to LEGACY to restore the behavior " +
+        s"before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.", e)
   }
 }
 
@@ -101,10 +132,6 @@ private object DateTimeFormatterHelper {
 
   def toFormatter(builder: DateTimeFormatterBuilder, locale: Locale): DateTimeFormatter = {
     builder
-      .parseDefaulting(ChronoField.MONTH_OF_YEAR, 1)
-      .parseDefaulting(ChronoField.DAY_OF_MONTH, 1)
-      .parseDefaulting(ChronoField.MINUTE_OF_HOUR, 0)
-      .parseDefaulting(ChronoField.SECOND_OF_MINUTE, 0)
       .toFormatter(locale)
       .withChronology(IsoChronology.INSTANCE)
       .withResolverStyle(ResolverStyle.STRICT)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
@@ -325,30 +325,30 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
       assert(parser.makeConverter("t", TimestampType).apply("2020-1-12 12:3:45") ==
         date(2020, 1, 12, 12, 3, 45, 0))
       assert(parser.makeConverter("t", DateType).apply("2020-1-12") ==
-        days(2020, 1, 12, 0, 0, 0))
+        days(2020, 1, 12))
       // The legacy format allows arbitrary length of second fraction.
       assert(parser.makeConverter("t", TimestampType).apply("2020-1-12 12:3:45.1") ==
         date(2020, 1, 12, 12, 3, 45, 100000))
       assert(parser.makeConverter("t", TimestampType).apply("2020-1-12 12:3:45.1234") ==
         date(2020, 1, 12, 12, 3, 45, 123400))
       // The legacy format allow date string to end with T or space, with arbitrary string
       assert(parser.makeConverter("t", DateType).apply("2020-1-12T") ==
-        days(2020, 1, 12, 0, 0, 0))
+        days(2020, 1, 12))
       assert(parser.makeConverter("t", DateType).apply("2020-1-12Txyz") ==
-        days(2020, 1, 12, 0, 0, 0))
+        days(2020, 1, 12))
       assert(parser.makeConverter("t", DateType).apply("2020-1-12 ") ==
-        days(2020, 1, 12, 0, 0, 0))
+        days(2020, 1, 12))
       assert(parser.makeConverter("t", DateType).apply("2020-1-12 xyz") ==
-        days(2020, 1, 12, 0, 0, 0))
+        days(2020, 1, 12))
       // The legacy format ignores the "GMT" from the string
       assert(parser.makeConverter("t", TimestampType).apply("2020-1-12 12:3:45GMT") ==
         date(2020, 1, 12, 12, 3, 45, 0))
       assert(parser.makeConverter("t", TimestampType).apply("GMT2020-1-12 12:3:45") ==
         date(2020, 1, 12, 12, 3, 45, 0))
       assert(parser.makeConverter("t", DateType).apply("2020-1-12GMT") ==
-        days(2020, 1, 12, 0, 0, 0))
+        days(2020, 1, 12))
       assert(parser.makeConverter("t", DateType).apply("GMT2020-1-12") ==
-        days(2020, 1, 12, 0, 0, 0))
+        days(2020, 1, 12))
     }
 
     val options = new CSVOptions(Map.empty[String, String], false, "UTC")

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala
@@ -88,12 +88,8 @@ object DateTimeTestUtils {
   def days(
       year: Int,
       month: Byte = 1,
-      day: Byte = 1,
-      hour: Byte = 0,
-      minute: Byte = 0,
-      sec: Byte = 0): Int = {
-    val micros = date(year, month, day, hour, minute, sec)
-    TimeUnit.MICROSECONDS.toDays(micros).toInt
+      day: Byte = 1): Int = {
+    LocalDate.of(year, month, day).toEpochDay.toInt
   }
 
   // Returns microseconds since epoch for current date and give time

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -386,13 +386,13 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
   }
 
   test("date add months") {
-    val input = days(1997, 2, 28, 10, 30)
+    val input = days(1997, 2, 28)
     assert(dateAddMonths(input, 36) === days(2000, 2, 28))
     assert(dateAddMonths(input, -13) === days(1996, 1, 28))
   }
 
   test("date add interval with day precision") {
-    val input = days(1997, 2, 28, 10, 30)
+    val input = days(1997, 2, 28)
     assert(dateAddInterval(input, new CalendarInterval(36, 0, 0)) === days(2000, 2, 28))
     assert(dateAddInterval(input, new CalendarInterval(36, 47, 0)) === days(2000, 4, 15))
     assert(dateAddInterval(input, new CalendarInterval(-13, 0, 0)) === days(1996, 1, 28))

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
@@ -17,18 +17,19 @@
 
 package org.apache.spark.sql.util
 
-import java.time.{DateTimeException, LocalDate, ZoneOffset}
+import java.time.{DateTimeException, LocalDate}
 
 import org.apache.spark.{SparkFunSuite, SparkUpgradeException}
 import org.apache.spark.sql.catalyst.plans.SQLHelper
-import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.catalyst.util.{DateFormatter, LegacyDateFormats}
+import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
 
 class DateFormatterSuite extends SparkFunSuite with SQLHelper {
   test("parsing dates") {
-    DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+    outstandingTimezonesIds.foreach { timeZone =>
       withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
         val formatter = DateFormatter(getZoneId(timeZone))
         val daysSinceEpoch = formatter.parse("2018-12-02")
@@ -38,7 +39,7 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("format dates") {
-    DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+    outstandingTimezonesIds.foreach { timeZone =>
       withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
         val formatter = DateFormatter(getZoneId(timeZone))
         val (days, expected) = (17867, "2018-12-02")
@@ -65,7 +66,7 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
             "2018-12-12",
             "2038-01-01",
             "5010-11-17").foreach { date =>
-            DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+            outstandingTimezonesIds.foreach { timeZone =>
               withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
                 val formatter = DateFormatter(
                   DateFormatter.defaultPattern,
@@ -99,7 +100,7 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
             17877,
             24837,
             1110657).foreach { days =>
-            DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+            outstandingTimezonesIds.foreach { timeZone =>
               withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
                 val formatter = DateFormatter(
                   DateFormatter.defaultPattern,
@@ -118,14 +119,14 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("parsing date without explicit day") {
-    val formatter = DateFormatter("yyyy MMM", ZoneOffset.UTC)
+    val formatter = DateFormatter("yyyy MMM", UTC)
     val daysSinceEpoch = formatter.parse("2018 Dec")
-    assert(daysSinceEpoch === LocalDate.of(2018, 12, 1).toEpochDay)
+    assert(daysSinceEpoch === days(2018, 12, 1))
   }
 
   test("formatting negative years with default pattern") {
-    val epochDays = LocalDate.of(-99, 1, 1).toEpochDay.toInt
-    assert(DateFormatter(ZoneOffset.UTC).format(epochDays) === "-0099-01-01")
+    val epochDays = days(-99, 1, 1)
+    assert(DateFormatter(UTC).format(epochDays) === "-0099-01-01")
   }
 
   test("special date values") {
@@ -142,8 +143,8 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("SPARK-30958: parse date with negative year") {
-    val formatter1 = DateFormatter("yyyy-MM-dd", ZoneOffset.UTC)
-    assert(formatter1.parse("-1234-02-22") === localDateToDays(LocalDate.of(-1234, 2, 22)))
+    val formatter1 = DateFormatter("yyyy-MM-dd", UTC)
+    assert(formatter1.parse("-1234-02-22") === days(-1234, 2, 22))
 
     def assertParsingError(f: => Unit): Unit = {
       intercept[Exception](f) match {
@@ -155,18 +156,18 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
     }
 
     // "yyyy" with "G" can't parse negative year or year 0000.
-    val formatter2 = DateFormatter("G yyyy-MM-dd", ZoneOffset.UTC)
+    val formatter2 = DateFormatter("G yyyy-MM-dd", UTC)
     assertParsingError(formatter2.parse("BC -1234-02-22"))
     assertParsingError(formatter2.parse("AD 0000-02-22"))
 
-    assert(formatter2.parse("BC 1234-02-22") === localDateToDays(LocalDate.of(-1233, 2, 22)))
-    assert(formatter2.parse("AD 1234-02-22") === localDateToDays(LocalDate.of(1234, 2, 22)))
+    assert(formatter2.parse("BC 1234-02-22") === days(-1233, 2, 22))
+    assert(formatter2.parse("AD 1234-02-22") === days(1234, 2, 22))
   }
 
   test("SPARK-31557: rebasing in legacy formatters/parsers") {
     withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> LegacyBehaviorPolicy.LEGACY.toString) {
       LegacyDateFormats.values.foreach { legacyFormat =>
-        DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+        outstandingTimezonesIds.foreach { timeZone =>
           withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
             val formatter = DateFormatter(
               DateFormatter.defaultPattern,
@@ -182,4 +183,17 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
       }
     }
   }
+
+  test("missing date fields") {
+    val formatter = DateFormatter("HH", UTC)
+    val daysSinceEpoch = formatter.parse("20")
+    assert(daysSinceEpoch === days(1970, 1, 1))
+  }
+
+  test("missing year field with invalid date") {
+    val formatter = DateFormatter("MM-dd", UTC)
+    // The date parser in 2.4 accepts 1970-02-29 and turn it into 1970-03-01, so we should get a
+    // SparkUpgradeException here.
+    intercept[SparkUpgradeException](formatter.parse("02-29"))
+  }
 }