diff --git a/common/tags/src/test/java/org/apache/spark/tags/ChromeUITest.java b/common/tags/src/test/java/org/apache/spark/tags/ChromeUITest.java new file mode 100644 index 000000000000..e3fed3d656d2 --- /dev/null +++ b/common/tags/src/test/java/org/apache/spark/tags/ChromeUITest.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.tags; + +import java.lang.annotation.*; + +import org.scalatest.TagAnnotation; + +@TagAnnotation +@Retention(RetentionPolicy.RUNTIME) +@Target({ElementType.METHOD, ElementType.TYPE}) +public @interface ChromeUITest { } diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerDiskManager.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerDiskManager.scala index 0a1f33395ad6..b1adc3c112ed 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerDiskManager.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerDiskManager.scala @@ -122,10 +122,12 @@ private class HistoryServerDiskManager( * being used so that it's not evicted when running out of designated space. */ def openStore(appId: String, attemptId: Option[String]): Option[File] = { + var newSize: Long = 0 val storePath = active.synchronized { val path = appStorePath(appId, attemptId) if (path.isDirectory()) { - active(appId -> attemptId) = sizeOf(path) + newSize = sizeOf(path) + active(appId -> attemptId) = newSize Some(path) } else { None @@ -133,7 +135,7 @@ private class HistoryServerDiskManager( } storePath.foreach { path => - updateAccessTime(appId, attemptId) + updateApplicationStoreInfo(appId, attemptId, newSize) } storePath @@ -238,10 +240,11 @@ private class HistoryServerDiskManager( new File(appStoreDir, fileName) } - private def updateAccessTime(appId: String, attemptId: Option[String]): Unit = { + private def updateApplicationStoreInfo( + appId: String, attemptId: Option[String], newSize: Long): Unit = { val path = appStorePath(appId, attemptId) - val info = ApplicationStoreInfo(path.getAbsolutePath(), clock.getTimeMillis(), appId, attemptId, - sizeOf(path)) + val info = ApplicationStoreInfo(path.getAbsolutePath(), clock.getTimeMillis(), appId, + attemptId, newSize) listing.write(info) } @@ -297,7 +300,7 @@ private class HistoryServerDiskManager( s"exceeded ($current > $max)") } - updateAccessTime(appId, attemptId) + updateApplicationStoreInfo(appId, attemptId, newSize) active.synchronized { active(appId -> attemptId) = newSize diff --git a/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala index b5614b263ca9..6191e41b4118 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala @@ -69,12 +69,12 @@ class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext with // Pass partitionId message in val message: String = context.partitionId().toString val messages: Array[String] = context.allGather(message) - messages.toList.iterator + Iterator.single(messages.toList) } - // Take a sorted list of all the partitionId messages - val messages = rdd2.collect().head - // All the task partitionIds are shared - for((x, i) <- messages.view.zipWithIndex) assert(x.toString == i.toString) + val messages = rdd2.collect() + // All the task partitionIds are shared across all tasks + assert(messages.length === 4) + assert(messages.forall(_ == List("0", "1", "2", "3"))) } test("throw exception if we attempt to synchronize with different blocking calls") { diff --git a/core/src/test/scala/org/apache/spark/ui/ChromeUISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/ChromeUISeleniumSuite.scala new file mode 100644 index 000000000000..9ba705c4abd7 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/ui/ChromeUISeleniumSuite.scala @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ui + +import org.openqa.selenium.WebDriver +import org.openqa.selenium.chrome.{ChromeDriver, ChromeOptions} + +import org.apache.spark.tags.ChromeUITest + +/** + * Selenium tests for the Spark Web UI with Chrome. + */ +@ChromeUITest +class ChromeUISeleniumSuite extends RealBrowserUISeleniumSuite("webdriver.chrome.driver") { + + override var webDriver: WebDriver = _ + + override def beforeAll(): Unit = { + super.beforeAll() + val chromeOptions = new ChromeOptions + chromeOptions.addArguments("--headless", "--disable-gpu") + webDriver = new ChromeDriver(chromeOptions) + } + + override def afterAll(): Unit = { + try { + if (webDriver != null) { + webDriver.quit() + } + } finally { + super.afterAll() + } + } +} diff --git a/core/src/test/scala/org/apache/spark/ui/RealBrowserUISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/RealBrowserUISeleniumSuite.scala new file mode 100644 index 000000000000..6793540c7e43 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/ui/RealBrowserUISeleniumSuite.scala @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ui + +import org.openqa.selenium.{By, WebDriver} +import org.scalatest._ +import org.scalatest.concurrent.Eventually._ +import org.scalatest.time.SpanSugar._ +import org.scalatestplus.selenium.WebBrowser + +import org.apache.spark._ +import org.apache.spark.LocalSparkContext.withSpark +import org.apache.spark.internal.config.MEMORY_OFFHEAP_SIZE +import org.apache.spark.internal.config.UI.{UI_ENABLED, UI_KILL_ENABLED, UI_PORT} +import org.apache.spark.util.CallSite + +/** + * Selenium tests for the Spark Web UI with real web browsers. + */ +abstract class RealBrowserUISeleniumSuite(val driverProp: String) + extends SparkFunSuite with WebBrowser with Matchers with BeforeAndAfterAll { + + implicit var webDriver: WebDriver + private val driverPropPrefix = "spark.test." + + override def beforeAll() { + super.beforeAll() + assume( + sys.props(driverPropPrefix + driverProp) !== null, + "System property " + driverPropPrefix + driverProp + + " should be set to the corresponding driver path.") + sys.props(driverProp) = sys.props(driverPropPrefix + driverProp) + } + + override def afterAll(): Unit = { + sys.props.remove(driverProp) + super.afterAll() + } + + test("SPARK-31534: text for tooltip should be escaped") { + withSpark(newSparkContext()) { sc => + sc.setLocalProperty(CallSite.LONG_FORM, "collect at :25") + sc.setLocalProperty(CallSite.SHORT_FORM, "collect at :25") + sc.parallelize(1 to 10).collect + + eventually(timeout(10.seconds), interval(50.milliseconds)) { + goToUi(sc, "/jobs") + + val jobDesc = + webDriver.findElement(By.cssSelector("div[class='application-timeline-content']")) + jobDesc.getAttribute("data-title") should include ("collect at <console>:25") + + goToUi(sc, "/jobs/job/?id=0") + webDriver.get(sc.ui.get.webUrl.stripSuffix("/") + "/jobs/job/?id=0") + val stageDesc = webDriver.findElement(By.cssSelector("div[class='job-timeline-content']")) + stageDesc.getAttribute("data-title") should include ("collect at <console>:25") + + // Open DAG Viz. + webDriver.findElement(By.id("job-dag-viz")).click() + val nodeDesc = webDriver.findElement(By.cssSelector("g[class='node_0 node']")) + nodeDesc.getAttribute("name") should include ("collect at <console>:25") + } + } + } + + /** + * Create a test SparkContext with the SparkUI enabled. + * It is safe to `get` the SparkUI directly from the SparkContext returned here. + */ + private def newSparkContext( + killEnabled: Boolean = true, + master: String = "local", + additionalConfs: Map[String, String] = Map.empty): SparkContext = { + val conf = new SparkConf() + .setMaster(master) + .setAppName("test") + .set(UI_ENABLED, true) + .set(UI_PORT, 0) + .set(UI_KILL_ENABLED, killEnabled) + .set(MEMORY_OFFHEAP_SIZE.key, "64m") + additionalConfs.foreach { case (k, v) => conf.set(k, v) } + val sc = new SparkContext(conf) + assert(sc.ui.isDefined) + sc + } + + def goToUi(sc: SparkContext, path: String): Unit = { + goToUi(sc.ui.get, path) + } + + def goToUi(ui: SparkUI, path: String): Unit = { + go to (ui.webUrl.stripSuffix("/") + path) + } +} diff --git a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala index 3ec938511640..909056eab8c5 100644 --- a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala +++ b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala @@ -773,33 +773,6 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B } } - test("SPARK-31534: text for tooltip should be escaped") { - withSpark(newSparkContext()) { sc => - sc.setLocalProperty(CallSite.LONG_FORM, "collect at :25") - sc.setLocalProperty(CallSite.SHORT_FORM, "collect at :25") - sc.parallelize(1 to 10).collect - - val driver = webDriver.asInstanceOf[HtmlUnitDriver] - driver.setJavascriptEnabled(true) - - eventually(timeout(10.seconds), interval(50.milliseconds)) { - goToUi(sc, "/jobs") - val jobDesc = - driver.findElement(By.cssSelector("div[class='application-timeline-content']")) - jobDesc.getAttribute("data-title") should include ("collect at <console>:25") - - goToUi(sc, "/jobs/job/?id=0") - val stageDesc = driver.findElement(By.cssSelector("div[class='job-timeline-content']")) - stageDesc.getAttribute("data-title") should include ("collect at <console>:25") - - // Open DAG Viz. - driver.findElement(By.id("job-dag-viz")).click() - val nodeDesc = driver.findElement(By.cssSelector("g[class='node_0 node']")) - nodeDesc.getAttribute("name") should include ("collect at <console>:25") - } - } - } - def goToUi(sc: SparkContext, path: String): Unit = { goToUi(sc.ui.get, path) } diff --git a/dev/run-tests.py b/dev/run-tests.py index 5255a77ec208..5f9cd6beb332 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -33,6 +33,9 @@ from sparktestsupport.toposort import toposort_flatten import sparktestsupport.modules as modules +always_excluded_tags = [ + "org.apache.spark.tags.ChromeUITest" +] # ------------------------------------------------------------------------------------------------- # Functions for traversing module dependency graph @@ -606,6 +609,8 @@ def main(): print("[info] Found the following changed modules:", ", ".join(x.name for x in changed_modules)) + excluded_tags.extend(always_excluded_tags) + # setup environment variables # note - the 'root' module doesn't collect environment variables for all modules. Because the # environment variables should not be set if a module is not changed, even if running the 'root' diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 96f2c5dcf973..2272c9038484 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -961,3 +961,4 @@ Below are the scenarios in which Hive and Spark generate different results: * `SQRT(n)` If n < 0, Hive returns null, Spark SQL returns NaN. * `ACOS(n)` If n < -1 or n > 1, Hive returns null, Spark SQL returns NaN. * `ASIN(n)` If n < -1 or n > 1, Hive returns null, Spark SQL returns NaN. +* `CAST(n AS TIMESTAMP)` If n is integral numbers, Hive treats n as milliseconds, Spark SQL treats n as seconds. diff --git a/pom.xml b/pom.xml index fd4cebcd3731..6620673a7e5f 100644 --- a/pom.xml +++ b/pom.xml @@ -243,6 +243,7 @@ things breaking. --> ${session.executionRootDirectory} + 1g @@ -2512,6 +2513,7 @@ false false true + ${spark.test.webdriver.chrome.driver} __not_used__ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 26d7b3ef3dcd..5e53927885ca 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -424,6 +424,9 @@ object FunctionRegistry { expression[MakeInterval]("make_interval"), expression[DatePart]("date_part"), expression[Extract]("extract"), + expression[SecondsToTimestamp]("timestamp_seconds"), + expression[MillisToTimestamp]("timestamp_millis"), + expression[MicrosToTimestamp]("timestamp_micros"), // collection functions expression[CreateArray]("array"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index ccedcb41fc28..afc57aa546fe 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -401,6 +401,83 @@ case class DayOfYear(child: Expression) extends UnaryExpression with ImplicitCas } } +abstract class NumberToTimestampBase extends UnaryExpression + with ExpectsInputTypes { + + protected def upScaleFactor: Long + + override def inputTypes: Seq[AbstractDataType] = Seq(IntegralType) + + override def dataType: DataType = TimestampType + + override def nullSafeEval(input: Any): Any = { + Math.multiplyExact(input.asInstanceOf[Number].longValue(), upScaleFactor) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + if (upScaleFactor == 1) { + defineCodeGen(ctx, ev, c => c) + } else { + defineCodeGen(ctx, ev, c => s"java.lang.Math.multiplyExact($c, ${upScaleFactor}L)") + } + } +} + +@ExpressionDescription( + usage = "_FUNC_(seconds) - Creates timestamp from the number of seconds since UTC epoch.", + examples = """ + Examples: + > SELECT _FUNC_(1230219000); + 2008-12-25 07:30:00 + """, + group = "datetime_funcs", + since = "3.1.0") +case class SecondsToTimestamp(child: Expression) + extends NumberToTimestampBase { + + override def upScaleFactor: Long = MICROS_PER_SECOND + + override def prettyName: String = "timestamp_seconds" +} + +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(milliseconds) - Creates timestamp from the number of milliseconds since UTC epoch.", + examples = """ + Examples: + > SELECT _FUNC_(1230219000123); + 2008-12-25 07:30:00.123 + """, + group = "datetime_funcs", + since = "3.1.0") +// scalastyle:on line.size.limit +case class MillisToTimestamp(child: Expression) + extends NumberToTimestampBase { + + override def upScaleFactor: Long = MICROS_PER_MILLIS + + override def prettyName: String = "timestamp_millis" +} + +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(microseconds) - Creates timestamp from the number of microseconds since UTC epoch.", + examples = """ + Examples: + > SELECT _FUNC_(1230219000123123); + 2008-12-25 07:30:00.123123 + """, + group = "datetime_funcs", + since = "3.1.0") +// scalastyle:on line.size.limit +case class MicrosToTimestamp(child: Expression) + extends NumberToTimestampBase { + + override def upScaleFactor: Long = 1L + + override def prettyName: String = "timestamp_micros" +} + @ExpressionDescription( usage = "_FUNC_(date) - Returns the year component of the date/timestamp.", examples = """ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala index 7d9495509619..8261f57916fa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala @@ -53,7 +53,7 @@ class Iso8601DateFormatter( val specialDate = convertSpecialDate(s.trim, zoneId) specialDate.getOrElse { try { - val localDate = LocalDate.parse(s, formatter) + val localDate = toLocalDate(formatter.parse(s)) localDateToDays(localDate) } catch checkDiffResult(s, legacyFormatter.parse) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala index 05ec23f7ad47..35f95dbffca6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.util import java.time._ import java.time.chrono.IsoChronology -import java.time.format.{DateTimeFormatter, DateTimeFormatterBuilder, DateTimeParseException, ResolverStyle} +import java.time.format.{DateTimeFormatter, DateTimeFormatterBuilder, ResolverStyle} import java.time.temporal.{ChronoField, TemporalAccessor, TemporalQueries} import java.util.Locale @@ -31,17 +31,52 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy._ trait DateTimeFormatterHelper { + private def getOrDefault(accessor: TemporalAccessor, field: ChronoField, default: Int): Int = { + if (accessor.isSupported(field)) { + accessor.get(field) + } else { + default + } + } + + protected def toLocalDate(accessor: TemporalAccessor): LocalDate = { + val localDate = accessor.query(TemporalQueries.localDate()) + // If all the date fields are specified, return the local date directly. + if (localDate != null) return localDate + + // Users may want to parse only a few datetime fields from a string and extract these fields + // later, and we should provide default values for missing fields. + // To be compatible with Spark 2.4, we pick 1970 as the default value of year. + val year = getOrDefault(accessor, ChronoField.YEAR, 1970) + val month = getOrDefault(accessor, ChronoField.MONTH_OF_YEAR, 1) + val day = getOrDefault(accessor, ChronoField.DAY_OF_MONTH, 1) + LocalDate.of(year, month, day) + } + + private def toLocalTime(accessor: TemporalAccessor): LocalTime = { + val localTime = accessor.query(TemporalQueries.localTime()) + // If all the time fields are specified, return the local time directly. + if (localTime != null) return localTime + + val hour = if (accessor.isSupported(ChronoField.HOUR_OF_DAY)) { + accessor.get(ChronoField.HOUR_OF_DAY) + } else if (accessor.isSupported(ChronoField.HOUR_OF_AMPM)) { + // When we reach here, it means am/pm is not specified. Here we assume it's am. + accessor.get(ChronoField.HOUR_OF_AMPM) + } else { + 0 + } + val minute = getOrDefault(accessor, ChronoField.MINUTE_OF_HOUR, 0) + val second = getOrDefault(accessor, ChronoField.SECOND_OF_MINUTE, 0) + val nanoSecond = getOrDefault(accessor, ChronoField.NANO_OF_SECOND, 0) + LocalTime.of(hour, minute, second, nanoSecond) + } + // Converts the parsed temporal object to ZonedDateTime. It sets time components to zeros // if they does not exist in the parsed object. - protected def toZonedDateTime( - temporalAccessor: TemporalAccessor, - zoneId: ZoneId): ZonedDateTime = { - // Parsed input might not have time related part. In that case, time component is set to zeros. - val parsedLocalTime = temporalAccessor.query(TemporalQueries.localTime) - val localTime = if (parsedLocalTime == null) LocalTime.MIDNIGHT else parsedLocalTime - // Parsed input must have date component. At least, year must present in temporalAccessor. - val localDate = temporalAccessor.query(TemporalQueries.localDate) - + protected def toZonedDateTime(accessor: TemporalAccessor, zoneId: ZoneId): ZonedDateTime = { + val localDate = toLocalDate(accessor) + val localTime = toLocalTime(accessor) ZonedDateTime.of(localDate, localTime, zoneId) } @@ -72,19 +107,15 @@ trait DateTimeFormatterHelper { // DateTimeParseException will address by the caller side. protected def checkDiffResult[T]( s: String, legacyParseFunc: String => T): PartialFunction[Throwable, T] = { - case e: DateTimeParseException if SQLConf.get.legacyTimeParserPolicy == EXCEPTION => - val res = try { - Some(legacyParseFunc(s)) + case e: DateTimeException if SQLConf.get.legacyTimeParserPolicy == EXCEPTION => + try { + legacyParseFunc(s) } catch { - case _: Throwable => None - } - if (res.nonEmpty) { - throw new SparkUpgradeException("3.0", s"Fail to parse '$s' in the new parser. You can " + - s"set ${SQLConf.LEGACY_TIME_PARSER_POLICY.key} to LEGACY to restore the behavior " + - s"before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.", e) - } else { - throw e + case _: Throwable => throw e } + throw new SparkUpgradeException("3.0", s"Fail to parse '$s' in the new parser. You can " + + s"set ${SQLConf.LEGACY_TIME_PARSER_POLICY.key} to LEGACY to restore the behavior " + + s"before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.", e) } } @@ -101,10 +132,6 @@ private object DateTimeFormatterHelper { def toFormatter(builder: DateTimeFormatterBuilder, locale: Locale): DateTimeFormatter = { builder - .parseDefaulting(ChronoField.MONTH_OF_YEAR, 1) - .parseDefaulting(ChronoField.DAY_OF_MONTH, 1) - .parseDefaulting(ChronoField.MINUTE_OF_HOUR, 0) - .parseDefaulting(ChronoField.SECOND_OF_MINUTE, 0) .toFormatter(locale) .withChronology(IsoChronology.INSTANCE) .withResolverStyle(ResolverStyle.STRICT) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala index 0e9fcc980aab..822008007ebb 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala @@ -325,7 +325,7 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper { assert(parser.makeConverter("t", TimestampType).apply("2020-1-12 12:3:45") == date(2020, 1, 12, 12, 3, 45, 0)) assert(parser.makeConverter("t", DateType).apply("2020-1-12") == - days(2020, 1, 12, 0, 0, 0)) + days(2020, 1, 12)) // The legacy format allows arbitrary length of second fraction. assert(parser.makeConverter("t", TimestampType).apply("2020-1-12 12:3:45.1") == date(2020, 1, 12, 12, 3, 45, 100000)) @@ -333,22 +333,22 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper { date(2020, 1, 12, 12, 3, 45, 123400)) // The legacy format allow date string to end with T or space, with arbitrary string assert(parser.makeConverter("t", DateType).apply("2020-1-12T") == - days(2020, 1, 12, 0, 0, 0)) + days(2020, 1, 12)) assert(parser.makeConverter("t", DateType).apply("2020-1-12Txyz") == - days(2020, 1, 12, 0, 0, 0)) + days(2020, 1, 12)) assert(parser.makeConverter("t", DateType).apply("2020-1-12 ") == - days(2020, 1, 12, 0, 0, 0)) + days(2020, 1, 12)) assert(parser.makeConverter("t", DateType).apply("2020-1-12 xyz") == - days(2020, 1, 12, 0, 0, 0)) + days(2020, 1, 12)) // The legacy format ignores the "GMT" from the string assert(parser.makeConverter("t", TimestampType).apply("2020-1-12 12:3:45GMT") == date(2020, 1, 12, 12, 3, 45, 0)) assert(parser.makeConverter("t", TimestampType).apply("GMT2020-1-12 12:3:45") == date(2020, 1, 12, 12, 3, 45, 0)) assert(parser.makeConverter("t", DateType).apply("2020-1-12GMT") == - days(2020, 1, 12, 0, 0, 0)) + days(2020, 1, 12)) assert(parser.makeConverter("t", DateType).apply("GMT2020-1-12") == - days(2020, 1, 12, 0, 0, 0)) + days(2020, 1, 12)) } val options = new CSVOptions(Map.empty[String, String], false, "UTC") diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 6e8397d12da7..87062f2d4ef3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -1146,4 +1146,26 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { Literal("yyyy-MM-dd'T'HH:mm:ss.SSSz")), "Fail to parse") } } + + test("SPARK-31710:Adds TIMESTAMP_SECONDS, " + + "TIMESTAMP_MILLIS and TIMESTAMP_MICROS functions") { + checkEvaluation(SecondsToTimestamp(Literal(1230219000)), 1230219000L * MICROS_PER_SECOND) + checkEvaluation(SecondsToTimestamp(Literal(-1230219000)), -1230219000L * MICROS_PER_SECOND) + checkEvaluation(SecondsToTimestamp(Literal(null, IntegerType)), null) + checkEvaluation(MillisToTimestamp(Literal(1230219000123L)), 1230219000123L * MICROS_PER_MILLIS) + checkEvaluation(MillisToTimestamp( + Literal(-1230219000123L)), -1230219000123L * MICROS_PER_MILLIS) + checkEvaluation(MillisToTimestamp(Literal(null, IntegerType)), null) + checkEvaluation(MicrosToTimestamp(Literal(1230219000123123L)), 1230219000123123L) + checkEvaluation(MicrosToTimestamp(Literal(-1230219000123123L)), -1230219000123123L) + checkEvaluation(MicrosToTimestamp(Literal(null, IntegerType)), null) + checkExceptionInExpression[ArithmeticException]( + SecondsToTimestamp(Literal(1230219000123123L)), "long overflow") + checkExceptionInExpression[ArithmeticException]( + SecondsToTimestamp(Literal(-1230219000123123L)), "long overflow") + checkExceptionInExpression[ArithmeticException]( + MillisToTimestamp(Literal(92233720368547758L)), "long overflow") + checkExceptionInExpression[ArithmeticException]( + MillisToTimestamp(Literal(-92233720368547758L)), "long overflow") + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala index bf9e8f71ba1c..66aef1b4b6cb 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala @@ -88,12 +88,8 @@ object DateTimeTestUtils { def days( year: Int, month: Byte = 1, - day: Byte = 1, - hour: Byte = 0, - minute: Byte = 0, - sec: Byte = 0): Int = { - val micros = date(year, month, day, hour, minute, sec) - TimeUnit.MICROSECONDS.toDays(micros).toInt + day: Byte = 1): Int = { + LocalDate.of(year, month, day).toEpochDay.toInt } // Returns microseconds since epoch for current date and give time diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 807ec7dafb56..4883bef8c088 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -386,13 +386,13 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { } test("date add months") { - val input = days(1997, 2, 28, 10, 30) + val input = days(1997, 2, 28) assert(dateAddMonths(input, 36) === days(2000, 2, 28)) assert(dateAddMonths(input, -13) === days(1996, 1, 28)) } test("date add interval with day precision") { - val input = days(1997, 2, 28, 10, 30) + val input = days(1997, 2, 28) assert(dateAddInterval(input, new CalendarInterval(36, 0, 0)) === days(2000, 2, 28)) assert(dateAddInterval(input, new CalendarInterval(36, 47, 0)) === days(2000, 4, 15)) assert(dateAddInterval(input, new CalendarInterval(-13, 0, 0)) === days(1996, 1, 28)) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala index 3954b9b8355c..7d503cc09117 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala @@ -17,18 +17,19 @@ package org.apache.spark.sql.util -import java.time.{DateTimeException, LocalDate, ZoneOffset} +import java.time.{DateTimeException, LocalDate} import org.apache.spark.{SparkFunSuite, SparkUpgradeException} import org.apache.spark.sql.catalyst.plans.SQLHelper -import org.apache.spark.sql.catalyst.util._ +import org.apache.spark.sql.catalyst.util.{DateFormatter, LegacyDateFormats} +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ import org.apache.spark.sql.catalyst.util.DateTimeUtils._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy class DateFormatterSuite extends SparkFunSuite with SQLHelper { test("parsing dates") { - DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone => + outstandingTimezonesIds.foreach { timeZone => withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) { val formatter = DateFormatter(getZoneId(timeZone)) val daysSinceEpoch = formatter.parse("2018-12-02") @@ -38,7 +39,7 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper { } test("format dates") { - DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone => + outstandingTimezonesIds.foreach { timeZone => withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) { val formatter = DateFormatter(getZoneId(timeZone)) val (days, expected) = (17867, "2018-12-02") @@ -65,7 +66,7 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper { "2018-12-12", "2038-01-01", "5010-11-17").foreach { date => - DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone => + outstandingTimezonesIds.foreach { timeZone => withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) { val formatter = DateFormatter( DateFormatter.defaultPattern, @@ -99,7 +100,7 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper { 17877, 24837, 1110657).foreach { days => - DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone => + outstandingTimezonesIds.foreach { timeZone => withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) { val formatter = DateFormatter( DateFormatter.defaultPattern, @@ -118,14 +119,14 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper { } test("parsing date without explicit day") { - val formatter = DateFormatter("yyyy MMM", ZoneOffset.UTC) + val formatter = DateFormatter("yyyy MMM", UTC) val daysSinceEpoch = formatter.parse("2018 Dec") - assert(daysSinceEpoch === LocalDate.of(2018, 12, 1).toEpochDay) + assert(daysSinceEpoch === days(2018, 12, 1)) } test("formatting negative years with default pattern") { - val epochDays = LocalDate.of(-99, 1, 1).toEpochDay.toInt - assert(DateFormatter(ZoneOffset.UTC).format(epochDays) === "-0099-01-01") + val epochDays = days(-99, 1, 1) + assert(DateFormatter(UTC).format(epochDays) === "-0099-01-01") } test("special date values") { @@ -142,8 +143,8 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper { } test("SPARK-30958: parse date with negative year") { - val formatter1 = DateFormatter("yyyy-MM-dd", ZoneOffset.UTC) - assert(formatter1.parse("-1234-02-22") === localDateToDays(LocalDate.of(-1234, 2, 22))) + val formatter1 = DateFormatter("yyyy-MM-dd", UTC) + assert(formatter1.parse("-1234-02-22") === days(-1234, 2, 22)) def assertParsingError(f: => Unit): Unit = { intercept[Exception](f) match { @@ -155,18 +156,18 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper { } // "yyyy" with "G" can't parse negative year or year 0000. - val formatter2 = DateFormatter("G yyyy-MM-dd", ZoneOffset.UTC) + val formatter2 = DateFormatter("G yyyy-MM-dd", UTC) assertParsingError(formatter2.parse("BC -1234-02-22")) assertParsingError(formatter2.parse("AD 0000-02-22")) - assert(formatter2.parse("BC 1234-02-22") === localDateToDays(LocalDate.of(-1233, 2, 22))) - assert(formatter2.parse("AD 1234-02-22") === localDateToDays(LocalDate.of(1234, 2, 22))) + assert(formatter2.parse("BC 1234-02-22") === days(-1233, 2, 22)) + assert(formatter2.parse("AD 1234-02-22") === days(1234, 2, 22)) } test("SPARK-31557: rebasing in legacy formatters/parsers") { withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> LegacyBehaviorPolicy.LEGACY.toString) { LegacyDateFormats.values.foreach { legacyFormat => - DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone => + outstandingTimezonesIds.foreach { timeZone => withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) { val formatter = DateFormatter( DateFormatter.defaultPattern, @@ -182,4 +183,17 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper { } } } + + test("missing date fields") { + val formatter = DateFormatter("HH", UTC) + val daysSinceEpoch = formatter.parse("20") + assert(daysSinceEpoch === days(1970, 1, 1)) + } + + test("missing year field with invalid date") { + val formatter = DateFormatter("MM-dd", UTC) + // The date parser in 2.4 accepts 1970-02-29 and turn it into 1970-03-01, so we should get a + // SparkUpgradeException here. + intercept[SparkUpgradeException](formatter.parse("02-29")) + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala index b467e24b5301..dccb3defe372 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala @@ -17,15 +17,15 @@ package org.apache.spark.sql.util -import java.time.{DateTimeException, Instant, LocalDateTime, LocalTime, ZoneOffset} +import java.time.{DateTimeException, Instant, LocalDateTime, LocalTime} import java.util.concurrent.TimeUnit import org.scalatest.Matchers import org.apache.spark.{SparkFunSuite, SparkUpgradeException} import org.apache.spark.sql.catalyst.plans.SQLHelper -import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils, LegacyDateFormats, TimestampFormatter} -import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{CET, PST, UTC} +import org.apache.spark.sql.catalyst.util.{LegacyDateFormats, TimestampFormatter} +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ import org.apache.spark.sql.catalyst.util.DateTimeUtils._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy @@ -44,10 +44,10 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers "Antarctica/Vostok" -> 1543723872001234L, "Asia/Hong_Kong" -> 1543716672001234L, "Europe/Amsterdam" -> 1543741872001234L) - DateTimeTestUtils.outstandingTimezonesIds.foreach { zoneId => + outstandingTimezonesIds.foreach { zoneId => val formatter = TimestampFormatter( "yyyy-MM-dd'T'HH:mm:ss.SSSSSS", - DateTimeUtils.getZoneId(zoneId), + getZoneId(zoneId), needVarLengthSecondFraction = true) val microsSinceEpoch = formatter.parse(localDate) assert(microsSinceEpoch === expectedMicros(zoneId)) @@ -65,7 +65,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers "Antarctica/Vostok" -> "2018-12-02 16:11:12.001234", "Asia/Hong_Kong" -> "2018-12-02 18:11:12.001234", "Europe/Amsterdam" -> "2018-12-02 11:11:12.001234") - DateTimeTestUtils.outstandingTimezonesIds.foreach { zoneId => + outstandingTimezonesIds.foreach { zoneId => Seq( TimestampFormatter( "yyyy-MM-dd HH:mm:ss.SSSSSS", @@ -95,7 +95,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers 1543749753123456L, 2177456523456789L, 11858049903010203L).foreach { micros => - DateTimeTestUtils.outstandingZoneIds.foreach { zoneId => + outstandingZoneIds.foreach { zoneId => val timestamp = TimestampFormatter(pattern, zoneId).format(micros) val parsed = TimestampFormatter( pattern, zoneId, needVarLengthSecondFraction = true).parse(timestamp) @@ -116,7 +116,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers "2018-12-02T11:22:33.123456", "2039-01-01T01:02:03.456789", "2345-10-07T22:45:03.010203").foreach { timestamp => - DateTimeTestUtils.outstandingZoneIds.foreach { zoneId => + outstandingZoneIds.foreach { zoneId => val pattern = "yyyy-MM-dd'T'HH:mm:ss.SSSSSS" val micros = TimestampFormatter( pattern, zoneId, needVarLengthSecondFraction = true).parse(timestamp) @@ -127,10 +127,9 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers } test("case insensitive parsing of am and pm") { - val formatter = TimestampFormatter("yyyy MMM dd hh:mm:ss a", ZoneOffset.UTC) + val formatter = TimestampFormatter("yyyy MMM dd hh:mm:ss a", UTC) val micros = formatter.parse("2009 Mar 20 11:30:01 am") - assert(micros === TimeUnit.SECONDS.toMicros( - LocalDateTime.of(2009, 3, 20, 11, 30, 1).toEpochSecond(ZoneOffset.UTC))) + assert(micros === date(2009, 3, 20, 11, 30, 1)) } test("format fraction of second") { @@ -143,7 +142,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers 1000000 -> "1970-01-01 00:00:01").foreach { case (micros, tsStr) => assert(formatter.format(micros) === tsStr) assert(formatter.format(microsToInstant(micros)) === tsStr) - DateTimeTestUtils.withDefaultTimeZone(UTC) { + withDefaultTimeZone(UTC) { assert(formatter.format(toJavaTimestamp(micros)) === tsStr) } } @@ -151,10 +150,10 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers test("formatting negative years with default pattern") { val instant = LocalDateTime.of(-99, 1, 1, 0, 0, 0).atZone(UTC).toInstant - val micros = DateTimeUtils.instantToMicros(instant) + val micros = instantToMicros(instant) assert(TimestampFormatter(UTC).format(micros) === "-0099-01-01 00:00:00") assert(TimestampFormatter(UTC).format(instant) === "-0099-01-01 00:00:00") - DateTimeTestUtils.withDefaultTimeZone(UTC) { // toJavaTimestamp depends on the default time zone + withDefaultTimeZone(UTC) { // toJavaTimestamp depends on the default time zone assert(TimestampFormatter("yyyy-MM-dd HH:mm:SS G", UTC).format(toJavaTimestamp(micros)) === "0100-01-01 00:00:00 BC") } @@ -181,11 +180,10 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers } test("parsing timestamp strings with various seconds fractions") { - DateTimeTestUtils.outstandingZoneIds.foreach { zoneId => + outstandingZoneIds.foreach { zoneId => def check(pattern: String, input: String, reference: String): Unit = { val formatter = TimestampFormatter(pattern, zoneId, needVarLengthSecondFraction = true) - val expected = DateTimeUtils.stringToTimestamp( - UTF8String.fromString(reference), zoneId).get + val expected = stringToTimestamp(UTF8String.fromString(reference), zoneId).get val actual = formatter.parse(input) assert(actual === expected) } @@ -219,11 +217,10 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers } test("formatting timestamp strings up to microsecond precision") { - DateTimeTestUtils.outstandingZoneIds.foreach { zoneId => + outstandingZoneIds.foreach { zoneId => def check(pattern: String, input: String, expected: String): Unit = { val formatter = TimestampFormatter(pattern, zoneId) - val timestamp = DateTimeUtils.stringToTimestamp( - UTF8String.fromString(input), zoneId).get + val timestamp = stringToTimestamp(UTF8String.fromString(input), zoneId).get val actual = formatter.format(timestamp) assert(actual === expected) } @@ -259,9 +256,8 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers } test("SPARK-30958: parse timestamp with negative year") { - val formatter1 = TimestampFormatter("yyyy-MM-dd HH:mm:ss", ZoneOffset.UTC, true) - assert(formatter1.parse("-1234-02-22 02:22:22") === instantToMicros( - LocalDateTime.of(-1234, 2, 22, 2, 22, 22).toInstant(ZoneOffset.UTC))) + val formatter1 = TimestampFormatter("yyyy-MM-dd HH:mm:ss", UTC, true) + assert(formatter1.parse("-1234-02-22 02:22:22") === date(-1234, 2, 22, 2, 22, 22)) def assertParsingError(f: => Unit): Unit = { intercept[Exception](f) match { @@ -277,17 +273,15 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers assertParsingError(formatter2.parse("BC -1234-02-22 02:22:22")) assertParsingError(formatter2.parse("AC 0000-02-22 02:22:22")) - assert(formatter2.parse("BC 1234-02-22 02:22:22") === instantToMicros( - LocalDateTime.of(-1233, 2, 22, 2, 22, 22).toInstant(ZoneOffset.UTC))) - assert(formatter2.parse("AD 1234-02-22 02:22:22") === instantToMicros( - LocalDateTime.of(1234, 2, 22, 2, 22, 22).toInstant(ZoneOffset.UTC))) + assert(formatter2.parse("BC 1234-02-22 02:22:22") === date(-1233, 2, 22, 2, 22, 22)) + assert(formatter2.parse("AD 1234-02-22 02:22:22") === date(1234, 2, 22, 2, 22, 22)) } test("SPARK-31557: rebasing in legacy formatters/parsers") { withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> LegacyBehaviorPolicy.LEGACY.toString) { - DateTimeTestUtils.outstandingZoneIds.foreach { zoneId => + outstandingZoneIds.foreach { zoneId => withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> zoneId.getId) { - DateTimeTestUtils.withDefaultTimeZone(zoneId) { + withDefaultTimeZone(zoneId) { withClue(s"zoneId = ${zoneId.getId}") { val formatters = LegacyDateFormats.values.map { legacyFormat => TimestampFormatter( @@ -296,7 +290,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers TimestampFormatter.defaultLocale, legacyFormat, needVarLengthSecondFraction = false) - }.toSeq :+ TimestampFormatter.getFractionFormatter(zoneId) + }.toSeq :+ TimestampFormatter.getFractionFormatter(zoneId) formatters.foreach { formatter => assert(microsToInstant(formatter.parse("1000-01-01 01:02:03")) .atZone(zoneId) @@ -317,4 +311,89 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers } } } + + test("parsing hour with various patterns") { + def createFormatter(pattern: String): TimestampFormatter = { + // Use `SIMPLE_DATE_FORMAT`, so that the legacy parser also fails with invalid value range. + TimestampFormatter(pattern, UTC, LegacyDateFormats.SIMPLE_DATE_FORMAT, false) + } + + withClue("HH") { + val formatter = createFormatter("yyyy-MM-dd HH") + + val micros1 = formatter.parse("2009-12-12 00") + assert(micros1 === date(2009, 12, 12)) + + val micros2 = formatter.parse("2009-12-12 15") + assert(micros2 === date(2009, 12, 12, 15)) + + intercept[DateTimeException](formatter.parse("2009-12-12 24")) + } + + withClue("kk") { + val formatter = createFormatter("yyyy-MM-dd kk") + + intercept[DateTimeException](formatter.parse("2009-12-12 00")) + + val micros1 = formatter.parse("2009-12-12 15") + assert(micros1 === date(2009, 12, 12, 15)) + + val micros2 = formatter.parse("2009-12-12 24") + assert(micros2 === date(2009, 12, 12)) + } + + withClue("KK") { + val formatter = createFormatter("yyyy-MM-dd KK a") + + val micros1 = formatter.parse("2009-12-12 00 am") + assert(micros1 === date(2009, 12, 12)) + + // For `KK`, "12:00:00 am" is the same as "00:00:00 pm". + val micros2 = formatter.parse("2009-12-12 12 am") + assert(micros2 === date(2009, 12, 12, 12)) + + val micros3 = formatter.parse("2009-12-12 00 pm") + assert(micros3 === date(2009, 12, 12, 12)) + + intercept[DateTimeException](formatter.parse("2009-12-12 12 pm")) + } + + withClue("hh") { + val formatter = createFormatter("yyyy-MM-dd hh a") + + intercept[DateTimeException](formatter.parse("2009-12-12 00 am")) + + val micros1 = formatter.parse("2009-12-12 12 am") + assert(micros1 === date(2009, 12, 12)) + + intercept[DateTimeException](formatter.parse("2009-12-12 00 pm")) + + val micros2 = formatter.parse("2009-12-12 12 pm") + assert(micros2 === date(2009, 12, 12, 12)) + } + } + + test("missing date fields") { + val formatter = TimestampFormatter("HH:mm:ss", UTC) + val micros = formatter.parse("11:30:01") + assert(micros === date(1970, 1, 1, 11, 30, 1)) + } + + test("missing year field with invalid date") { + // Use `SIMPLE_DATE_FORMAT`, so that the legacy parser also fails with invalid date. + val formatter = TimestampFormatter("MM-dd", UTC, LegacyDateFormats.SIMPLE_DATE_FORMAT, false) + withDefaultTimeZone(UTC)(intercept[DateTimeException](formatter.parse("02-29"))) + } + + test("missing am/pm field") { + val formatter = TimestampFormatter("yyyy hh:mm:ss", UTC) + val micros = formatter.parse("2009 11:30:01") + assert(micros === date(2009, 1, 1, 11, 30, 1)) + } + + test("missing time fields") { + val formatter = TimestampFormatter("yyyy HH", UTC) + val micros = formatter.parse("2009 11") + assert(micros === date(2009, 1, 1, 11)) + } } diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 3570fb61e288..0efc2a6e0c02 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -1,6 +1,6 @@ ## Summary - - Number of queries: 333 + - Number of queries: 336 - Number of expressions that missing example: 34 - Expressions missing examples: and,string,tinyint,double,smallint,date,decimal,boolean,float,binary,bigint,int,timestamp,cume_dist,dense_rank,input_file_block_length,input_file_block_start,input_file_name,lag,lead,monotonically_increasing_id,ntile,struct,!,not,or,percent_rank,rank,row_number,spark_partition_id,version,window,positive,count_min_sketch ## Schema of Built-in Functions @@ -175,6 +175,8 @@ | org.apache.spark.sql.catalyst.expressions.MapValues | map_values | SELECT map_values(map(1, 'a', 2, 'b')) | struct> | | org.apache.spark.sql.catalyst.expressions.MapZipWith | map_zip_with | SELECT map_zip_with(map(1, 'a', 2, 'b'), map(1, 'x', 2, 'y'), (k, v1, v2) -> concat(v1, v2)) | struct> | | org.apache.spark.sql.catalyst.expressions.Md5 | md5 | SELECT md5('Spark') | struct | +| org.apache.spark.sql.catalyst.expressions.MicrosToTimestamp | timestamp_micros | SELECT timestamp_micros(1230219000123123) | struct | +| org.apache.spark.sql.catalyst.expressions.MillisToTimestamp | timestamp_millis | SELECT timestamp_millis(1230219000123) | struct | | org.apache.spark.sql.catalyst.expressions.Minute | minute | SELECT minute('2009-07-30 12:58:59') | struct | | org.apache.spark.sql.catalyst.expressions.MonotonicallyIncreasingID | monotonically_increasing_id | N/A | N/A | | org.apache.spark.sql.catalyst.expressions.Month | month | SELECT month('2016-07-30') | struct | @@ -223,6 +225,7 @@ | org.apache.spark.sql.catalyst.expressions.SchemaOfCsv | schema_of_csv | SELECT schema_of_csv('1,abc') | struct | | org.apache.spark.sql.catalyst.expressions.SchemaOfJson | schema_of_json | SELECT schema_of_json('[{"col":0}]') | struct | | org.apache.spark.sql.catalyst.expressions.Second | second | SELECT second('2009-07-30 12:58:59') | struct | +| org.apache.spark.sql.catalyst.expressions.SecondsToTimestamp | timestamp_seconds | SELECT timestamp_seconds(1230219000) | struct | | org.apache.spark.sql.catalyst.expressions.Sentences | sentences | SELECT sentences('Hi there! Good morning.') | struct>> | | org.apache.spark.sql.catalyst.expressions.Sequence | sequence | SELECT sequence(1, 5) | struct> | | org.apache.spark.sql.catalyst.expressions.Sha1 | sha1 | SELECT sha1('Spark') | struct | diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql index fd3325085df9..0fb373f419e7 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql @@ -1,5 +1,15 @@ -- date time functions +-- [SPARK-31710] TIMESTAMP_SECONDS, TIMESTAMP_MILLISECONDS and TIMESTAMP_MICROSECONDS to timestamp transfer +select TIMESTAMP_SECONDS(1230219000),TIMESTAMP_SECONDS(-1230219000),TIMESTAMP_SECONDS(null); +select TIMESTAMP_MILLIS(1230219000123),TIMESTAMP_MILLIS(-1230219000123),TIMESTAMP_MILLIS(null); +select TIMESTAMP_MICROS(1230219000123123),TIMESTAMP_MICROS(-1230219000123123),TIMESTAMP_MICROS(null); +-- overflow exception: +select TIMESTAMP_SECONDS(1230219000123123); +select TIMESTAMP_SECONDS(-1230219000123123); +select TIMESTAMP_MILLIS(92233720368547758); +select TIMESTAMP_MILLIS(-92233720368547758); + -- [SPARK-16836] current_date and current_timestamp literals select current_date = current_date(), current_timestamp = current_timestamp(); @@ -86,7 +96,7 @@ select date_sub('2011-11-11', str) from v; select null - date '2019-10-06'; select date '2001-10-01' - date '2001-09-28'; --- variable-length tests +-- variable-length second fraction tests select to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]'); select to_timestamp('2019-10-06 10:11:12.0', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]'); select to_timestamp('2019-10-06 10:11:12.1', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]'); @@ -95,7 +105,7 @@ select to_timestamp('2019-10-06 10:11:12.123UTC', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zz select to_timestamp('2019-10-06 10:11:12.1234', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]'); select to_timestamp('2019-10-06 10:11:12.12345CST', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]'); select to_timestamp('2019-10-06 10:11:12.123456PST', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]'); --- exceeded max variable length +-- second fraction exceeded max variable length select to_timestamp('2019-10-06 10:11:12.1234567PST', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]'); -- special cases select to_timestamp('123456 2019-10-06 10:11:12.123456PST', 'SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]'); @@ -122,3 +132,11 @@ select to_timestamp("2019-10-06T10:11:12'12", "yyyy-MM-dd'T'HH:mm:ss''SSSS"); -- select to_timestamp("2019-10-06T10:11:12'", "yyyy-MM-dd'T'HH:mm:ss''"); -- tail select to_timestamp("'2019-10-06T10:11:12", "''yyyy-MM-dd'T'HH:mm:ss"); -- head select to_timestamp("P2019-10-06T10:11:12", "'P'yyyy-MM-dd'T'HH:mm:ss"); -- head but as single quote + +-- missing fields +select to_timestamp("16", "dd"); +select to_timestamp("02-29", "MM-dd"); +select to_date("16", "dd"); +select to_date("02-29", "MM-dd"); +select to_timestamp("2019 40", "yyyy mm"); +select to_timestamp("2019 10:10:10", "yyyy hh:mm:ss"); diff --git a/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql index 131890fddb0d..f6fa44161a77 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql @@ -48,6 +48,21 @@ select from_json('[null, {"a":2}]', 'array>'); select from_json('[{"a": 1}, {"b":2}]', 'array>'); select from_json('[{"a": 1}, 2]', 'array>'); +-- from_json - datetime type +select from_json('{"d": "2012-12-15", "t": "2012-12-15 15:15:15"}', 'd date, t timestamp'); +select from_json( + '{"d": "12/15 2012", "t": "12/15 2012 15:15:15"}', + 'd date, t timestamp', + map('dateFormat', 'MM/dd yyyy', 'timestampFormat', 'MM/dd yyyy HH:mm:ss')); +select from_json( + '{"d": "02-29"}', + 'd date', + map('dateFormat', 'MM-dd')); +select from_json( + '{"t": "02-29"}', + 't timestamp', + map('timestampFormat', 'MM-dd')); + -- to_json - array type select to_json(array('1', '2', '3')); select to_json(array(array(1, 2, 3), array(4))); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out index aad1e5f34387..2e61cb8cb8c3 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out @@ -1,5 +1,65 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 85 +-- Number of queries: 92 + + +-- !query +select TIMESTAMP_SECONDS(1230219000),TIMESTAMP_SECONDS(-1230219000),TIMESTAMP_SECONDS(null) +-- !query schema +struct +-- !query output +2008-12-25 07:30:00 1931-01-07 00:30:00 NULL + + +-- !query +select TIMESTAMP_MILLIS(1230219000123),TIMESTAMP_MILLIS(-1230219000123),TIMESTAMP_MILLIS(null) +-- !query schema +struct +-- !query output +2008-12-25 07:30:00.123 1931-01-07 00:29:59.877 NULL + + +-- !query +select TIMESTAMP_MICROS(1230219000123123),TIMESTAMP_MICROS(-1230219000123123),TIMESTAMP_MICROS(null) +-- !query schema +struct +-- !query output +2008-12-25 07:30:00.123123 1931-01-07 00:29:59.876877 NULL + + +-- !query +select TIMESTAMP_SECONDS(1230219000123123) +-- !query schema +struct<> +-- !query output +java.lang.ArithmeticException +long overflow + + +-- !query +select TIMESTAMP_SECONDS(-1230219000123123) +-- !query schema +struct<> +-- !query output +java.lang.ArithmeticException +long overflow + + +-- !query +select TIMESTAMP_MILLIS(92233720368547758) +-- !query schema +struct<> +-- !query output +java.lang.ArithmeticException +long overflow + + +-- !query +select TIMESTAMP_MILLIS(-92233720368547758) +-- !query schema +struct<> +-- !query output +java.lang.ArithmeticException +long overflow -- !query @@ -730,3 +790,51 @@ select to_timestamp("P2019-10-06T10:11:12", "'P'yyyy-MM-dd'T'HH:mm:ss") struct -- !query output 2019-10-06 10:11:12 + + +-- !query +select to_timestamp("16", "dd") +-- !query schema +struct +-- !query output +1970-01-16 00:00:00 + + +-- !query +select to_timestamp("02-29", "MM-dd") +-- !query schema +struct +-- !query output +NULL + + +-- !query +select to_date("16", "dd") +-- !query schema +struct +-- !query output +1970-01-16 + + +-- !query +select to_date("02-29", "MM-dd") +-- !query schema +struct +-- !query output +NULL + + +-- !query +select to_timestamp("2019 40", "yyyy mm") +-- !query schema +struct +-- !query output +2019-01-01 00:40:00 + + +-- !query +select to_timestamp("2019 10:10:10", "yyyy hh:mm:ss") +-- !query schema +struct +-- !query output +2019-01-01 10:10:10 diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out index a4f5b3772d2d..4b879fcfbfc5 100755 --- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out @@ -1,5 +1,65 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 85 +-- Number of queries: 91 + + +-- !query +select TIMESTAMP_SECONDS(1230219000),TIMESTAMP_SECONDS(-1230219000),TIMESTAMP_SECONDS(null) +-- !query schema +struct +-- !query output +2008-12-25 07:30:00 1931-01-07 00:30:00 NULL + + +-- !query +select TIMESTAMP_MILLIS(1230219000123),TIMESTAMP_MILLIS(-1230219000123),TIMESTAMP_MILLIS(null) +-- !query schema +struct +-- !query output +2008-12-25 07:30:00.123 1931-01-07 00:29:59.877 NULL + + +-- !query +select TIMESTAMP_MICROS(1230219000123123),TIMESTAMP_MICROS(-1230219000123123),TIMESTAMP_MICROS(null) +-- !query schema +struct +-- !query output +2008-12-25 07:30:00.123123 1931-01-07 00:29:59.876877 NULL + + +-- !query +select TIMESTAMP_SECONDS(1230219000123123) +-- !query schema +struct<> +-- !query output +java.lang.ArithmeticException +long overflow + + +-- !query +select TIMESTAMP_SECONDS(-1230219000123123) +-- !query schema +struct<> +-- !query output +java.lang.ArithmeticException +long overflow + + +-- !query +select TIMESTAMP_MILLIS(92233720368547758) +-- !query schema +struct<> +-- !query output +java.lang.ArithmeticException +long overflow + + +-- !query +select TIMESTAMP_MILLIS(-92233720368547758) +-- !query schema +struct<> +-- !query output +java.lang.ArithmeticException +long overflow -- !query @@ -702,3 +762,51 @@ select to_timestamp("P2019-10-06T10:11:12", "'P'yyyy-MM-dd'T'HH:mm:ss") struct -- !query output 2019-10-06 10:11:12 + + +-- !query +select to_timestamp("16", "dd") +-- !query schema +struct +-- !query output +1970-01-16 00:00:00 + + +-- !query +select to_timestamp("02-29", "MM-dd") +-- !query schema +struct +-- !query output +NULL + + +-- !query +select to_date("16", "dd") +-- !query schema +struct +-- !query output +1970-01-16 + + +-- !query +select to_date("02-29", "MM-dd") +-- !query schema +struct +-- !query output +NULL + + +-- !query +select to_timestamp("2019 40", "yyyy mm") +-- !query schema +struct +-- !query output +2019-01-01 00:40:00 + + +-- !query +select to_timestamp("2019 10:10:10", "yyyy hh:mm:ss") +-- !query schema +struct +-- !query output +2019-01-01 10:10:10 diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out index e85237f1c9f6..34a329627f5d 100644 --- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 67 +-- Number of queries: 71 -- !query @@ -288,6 +288,49 @@ struct>> NULL +-- !query +select from_json('{"d": "2012-12-15", "t": "2012-12-15 15:15:15"}', 'd date, t timestamp') +-- !query schema +struct> +-- !query output +{"d":2012-12-15,"t":2012-12-15 15:15:15} + + +-- !query +select from_json( + '{"d": "12/15 2012", "t": "12/15 2012 15:15:15"}', + 'd date, t timestamp', + map('dateFormat', 'MM/dd yyyy', 'timestampFormat', 'MM/dd yyyy HH:mm:ss')) +-- !query schema +struct> +-- !query output +{"d":2012-12-15,"t":2012-12-15 15:15:15} + + +-- !query +select from_json( + '{"d": "02-29"}', + 'd date', + map('dateFormat', 'MM-dd')) +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to parse '02-29' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string. + + +-- !query +select from_json( + '{"t": "02-29"}', + 't timestamp', + map('timestampFormat', 'MM-dd')) +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to parse '02-29' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string. + + -- !query select to_json(array('1', '2', '3')) -- !query schema