diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js index fd4a48d2db33..474c45364336 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js +++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js @@ -173,9 +173,11 @@ function renderDagViz(forJob) { }); metadataContainer().selectAll(".barrier-rdd").each(function() { - var rddId = d3.select(this).text().trim(); - var clusterId = VizConstants.clusterPrefix + rddId; - svg.selectAll("g." + clusterId).classed("barrier", true) + var opId = d3.select(this).text().trim(); + var opClusterId = VizConstants.clusterPrefix + opId; + var stageId = $(this).parents(".stage-metadata").attr("stage-id"); + var stageClusterId = VizConstants.graphPrefix + stageId; + svg.selectAll("g[id=" + stageClusterId + "] g." + opClusterId).classed("barrier", true) }); resizeSvg(svg); diff --git a/core/src/test/scala/org/apache/spark/ui/RealBrowserUISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/RealBrowserUISeleniumSuite.scala index 06b6483717a6..4b018f69b166 100644 --- a/core/src/test/scala/org/apache/spark/ui/RealBrowserUISeleniumSuite.scala +++ b/core/src/test/scala/org/apache/spark/ui/RealBrowserUISeleniumSuite.scala @@ -100,6 +100,34 @@ abstract class RealBrowserUISeleniumSuite(val driverProp: String) } } + test("SPARK-31886: Color barrier execution mode RDD correctly") { + withSpark(newSparkContext()) { sc => + sc.parallelize(1 to 10).barrier.mapPartitions(identity).repartition(1).collect() + + eventually(timeout(10.seconds), interval(50.milliseconds)) { + goToUi(sc, "/jobs/job/?id=0") + webDriver.findElement(By.id("job-dag-viz")).click() + + val stage0 = webDriver.findElement(By.cssSelector("g[id='graph_0']")) + val stage1 = webDriver.findElement(By.cssSelector("g[id='graph_1']")) + val barrieredOps = webDriver.findElements(By.className("barrier-rdd")).iterator() + + while (barrieredOps.hasNext) { + val barrieredOpId = barrieredOps.next().getAttribute("innerHTML") + val foundInStage0 = + stage0.findElements( + By.cssSelector("g.barrier.cluster.cluster_" + barrieredOpId)) + assert(foundInStage0.size === 1) + + val foundInStage1 = + stage1.findElements( + By.cssSelector("g.barrier.cluster.cluster_" + barrieredOpId)) + assert(foundInStage1.size === 0) + } + } + } + } + /** * Create a test SparkContext with the SparkUI enabled. * It is safe to `get` the SparkUI directly from the SparkContext returned here. diff --git a/docs/pyspark-migration-guide.md b/docs/pyspark-migration-guide.md index 6f0fbbfb78de..2c9ea410f217 100644 --- a/docs/pyspark-migration-guide.md +++ b/docs/pyspark-migration-guide.md @@ -45,6 +45,8 @@ Please refer [Migration Guide: SQL, Datasets and DataFrame](sql-migration-guide. - As of Spark 3.0, `Row` field names are no longer sorted alphabetically when constructing with named arguments for Python versions 3.6 and above, and the order of fields will match that as entered. To enable sorted fields by default, as in Spark 2.4, set the environment variable `PYSPARK_ROW_FIELD_SORTING_ENABLED` to `true` for both executors and driver - this environment variable must be consistent on all executors and driver; otherwise, it may cause failures or incorrect answers. For Python versions less than 3.6, the field names will be sorted alphabetically as the only option. +- In Spark 3.0, `pyspark.ml.param.shared.Has*` mixins do not provide any `set*(self, value)` setter methods anymore, use the respective `self.set(self.*, value)` instead. See [SPARK-29093](https://issues.apache.org/jira/browse/SPARK-29093) for details. + ## Upgrading from PySpark 2.3 to 2.4 - In PySpark, when Arrow optimization is enabled, previously `toPandas` just failed when Arrow optimization is unable to be used whereas `createDataFrame` from Pandas DataFrame allowed the fallback to non-optimization. Now, both `toPandas` and `createDataFrame` from Pandas DataFrame allow the fallback by default, which can be switched off by `spark.sql.execution.arrow.fallback.enabled`. diff --git a/docs/sql-ref-datetime-pattern.md b/docs/sql-ref-datetime-pattern.md index 865b9470ed19..3c0bc754f940 100644 --- a/docs/sql-ref-datetime-pattern.md +++ b/docs/sql-ref-datetime-pattern.md @@ -36,11 +36,7 @@ Spark uses pattern letters in the following table for date and timestamp parsing |**M/L**|month-of-year|month|7; 07; Jul; July| |**d**|day-of-month|number(3)|28| |**Q/q**|quarter-of-year|number/text|3; 03; Q3; 3rd quarter| -|**Y**|week-based-year|year|1996; 96| -|**w**|week-of-week-based-year|number(2)|27| -|**W**|week-of-month|number(1)|4| |**E**|day-of-week|text|Tue; Tuesday| -|**u**|localized day-of-week|number/text|2; 02; Tue; Tuesday| |**F**|week-of-month|number(1)|3| |**a**|am-pm-of-day|am-pm|PM| |**h**|clock-hour-of-am-pm (1-12)|number(2)|12| @@ -63,7 +59,7 @@ Spark uses pattern letters in the following table for date and timestamp parsing The count of pattern letters determines the format. -- Text: The text style is determined based on the number of pattern letters used. Less than 4 pattern letters will use the short form. Exactly 4 pattern letters will use the full form. Exactly 5 pattern letters will use the narrow form. 5 or more letters will fail. +- Text: The text style is determined based on the number of pattern letters used. Less than 4 pattern letters will use the short text form, typically an abbreviation, e.g. day-of-week Monday might output "Mon". Exactly 4 pattern letters will use the full text form, typically the full description, e.g, day-of-week Monday might output "Monday". 5 or more letters will fail. - Number(n): The n here represents the maximum count of letters this type of datetime pattern can be used. If the count of letters is one, then the value is output using the minimum number of digits and without padding. Otherwise, the count of digits is used as the width of the output field, with the value zero-padded as necessary. @@ -136,9 +132,5 @@ The count of pattern letters determines the format. During formatting, all valid data will be output even it is in the optional section. During parsing, the whole section may be missing from the parsed string. An optional section is started by `[` and ended using `]` (or at the end of the pattern). - -More details for the text style: - -- Short Form: Short text, typically an abbreviation. For example, day-of-week Monday might output "Mon". - -- Full Form: Full text, typically the full description. For example, day-of-week Monday might output "Monday". + +- Symbols of 'E', 'F', 'q' and 'Q' can only be used for datetime formatting, e.g. `date_format`. They are not allowed used for datetime parsing, e.g. `to_timestamp`. diff --git a/python/pyspark/sql/utils.py b/python/pyspark/sql/utils.py index 27adc2372ec0..3fd704721a92 100644 --- a/python/pyspark/sql/utils.py +++ b/python/pyspark/sql/utils.py @@ -44,7 +44,7 @@ def __str__(self): debug_enabled = sql_conf.pysparkJVMStacktraceEnabled() desc = self.desc if debug_enabled: - desc = desc + "\nJVM stacktrace:\n%s" % self.stackTrace + desc = desc + "\n\nJVM stacktrace:\n%s" % self.stackTrace # encode unicode instance for python2 for human readable description if sys.version_info.major < 3 and isinstance(desc, unicode): return str(desc.encode('utf-8')) diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index b76d64bb687f..503540403f5e 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -186,6 +186,9 @@ hadoop-2.7 + + true + com.amazonaws diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 68edf851bf2a..9c97e1e9b441 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1015,7 +1015,7 @@ class Analyzer( private def lookupRelation(identifier: Seq[String]): Option[LogicalPlan] = { expandRelationName(identifier) match { case SessionCatalogAndIdentifier(catalog, ident) => - def loaded = CatalogV2Util.loadTable(catalog, ident).map { + lazy val loaded = CatalogV2Util.loadTable(catalog, ident).map { case v1Table: V1Table => v1SessionCatalog.getRelation(v1Table.v1Table) case table => @@ -1024,7 +1024,12 @@ class Analyzer( DataSourceV2Relation.create(table, Some(catalog), Some(ident))) } val key = catalog.name +: ident.namespace :+ ident.name - Option(AnalysisContext.get.relationCache.getOrElseUpdate(key, loaded.orNull)) + AnalysisContext.get.relationCache.get(key).map(_.transform { + case multi: MultiInstanceRelation => multi.newInstance() + }).orElse { + loaded.foreach(AnalysisContext.get.relationCache.update(key, _)) + loaded + } case _ => None } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index d02776b5d86f..4e63ee7428d7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -525,7 +525,7 @@ object CatalogColumnStat extends Logging { TimestampFormatter( format = "yyyy-MM-dd HH:mm:ss.SSSSSS", zoneId = ZoneOffset.UTC, - needVarLengthSecondFraction = isParsing) + isParsing = isParsing) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala index a7c243537acb..f0df18da8eed 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala @@ -35,7 +35,7 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable { options.zoneId, options.locale, legacyFormat = FAST_DATE_FORMAT, - needVarLengthSecondFraction = true) + isParsing = true) private val decimalParser = if (options.locale == Locale.US) { // Special handling the default locale for backward compatibility diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala index 4990da2bf379..a3ee129cd6d6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala @@ -47,12 +47,13 @@ class UnivocityGenerator( options.zoneId, options.locale, legacyFormat = FAST_DATE_FORMAT, - needVarLengthSecondFraction = false) + isParsing = false) private val dateFormatter = DateFormatter( options.dateFormat, options.zoneId, options.locale, - legacyFormat = FAST_DATE_FORMAT) + legacyFormat = FAST_DATE_FORMAT, + isParsing = false) private def makeConverter(dataType: DataType): ValueConverter = dataType match { case DateType => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala index f2bb7db895ca..3898eca79478 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala @@ -90,12 +90,13 @@ class UnivocityParser( options.zoneId, options.locale, legacyFormat = FAST_DATE_FORMAT, - needVarLengthSecondFraction = true) + isParsing = true) private lazy val dateFormatter = DateFormatter( options.dateFormat, options.zoneId, options.locale, - legacyFormat = FAST_DATE_FORMAT) + legacyFormat = FAST_DATE_FORMAT, + isParsing = true) private val csvFilters = new CSVFilters(filters, requiredSchema) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 4f3db1b8a57c..c5ead9412a43 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -734,7 +734,7 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti format.toString, zoneId, legacyFormat = SIMPLE_DATE_FORMAT, - needVarLengthSecondFraction = false) + isParsing = false) } } else None } @@ -745,7 +745,7 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti format.toString, zoneId, legacyFormat = SIMPLE_DATE_FORMAT, - needVarLengthSecondFraction = false) + isParsing = false) } else { formatter.get } @@ -890,7 +890,7 @@ abstract class ToTimestamp constFormat.toString, zoneId, legacyFormat = SIMPLE_DATE_FORMAT, - needVarLengthSecondFraction = true) + isParsing = true) } catch { case e: SparkUpgradeException => throw e case NonFatal(_) => null @@ -929,7 +929,7 @@ abstract class ToTimestamp formatString, zoneId, legacyFormat = SIMPLE_DATE_FORMAT, - needVarLengthSecondFraction = true) + isParsing = true) .parse(t.asInstanceOf[UTF8String].toString) / downScaleFactor } catch { case e: SparkUpgradeException => throw e @@ -1072,7 +1072,7 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[ constFormat.toString, zoneId, legacyFormat = SIMPLE_DATE_FORMAT, - needVarLengthSecondFraction = false) + isParsing = false) } catch { case e: SparkUpgradeException => throw e case NonFatal(_) => null @@ -1105,7 +1105,7 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[ f.toString, zoneId, legacyFormat = SIMPLE_DATE_FORMAT, - needVarLengthSecondFraction = false) + isParsing = false) .format(time.asInstanceOf[Long] * MICROS_PER_SECOND)) } catch { case e: SparkUpgradeException => throw e diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala index f3938feef0a3..fb0ca323af1f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala @@ -83,12 +83,13 @@ private[sql] class JacksonGenerator( options.zoneId, options.locale, legacyFormat = FAST_DATE_FORMAT, - needVarLengthSecondFraction = false) + isParsing = false) private val dateFormatter = DateFormatter( options.dateFormat, options.zoneId, options.locale, - legacyFormat = FAST_DATE_FORMAT) + legacyFormat = FAST_DATE_FORMAT, + isParsing = false) private def makeWriter(dataType: DataType): ValueWriter = dataType match { case NullType => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala index c4f612172349..e038f777c7a4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala @@ -61,12 +61,13 @@ class JacksonParser( options.zoneId, options.locale, legacyFormat = FAST_DATE_FORMAT, - needVarLengthSecondFraction = true) + isParsing = true) private lazy val dateFormatter = DateFormatter( options.dateFormat, options.zoneId, options.locale, - legacyFormat = FAST_DATE_FORMAT) + legacyFormat = FAST_DATE_FORMAT, + isParsing = true) /** * Create a converter which converts the JSON documents held by the `JsonParser` diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala index 56b12784fd21..de396a4c6345 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala @@ -43,7 +43,7 @@ private[sql] class JsonInferSchema(options: JSONOptions) extends Serializable { options.zoneId, options.locale, legacyFormat = FAST_DATE_FORMAT, - needVarLengthSecondFraction = true) + isParsing = true) /** * Infer the type of a collection of json records in three stages: diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala index fe20e546f5d2..b611ffa198b1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql.catalyst.util import java.text.SimpleDateFormat import java.time.{LocalDate, ZoneId} -import java.time.format.DateTimeFormatter import java.util.{Date, Locale} import org.apache.commons.lang3.time.FastDateFormat @@ -42,11 +41,12 @@ class Iso8601DateFormatter( pattern: String, zoneId: ZoneId, locale: Locale, - legacyFormat: LegacyDateFormats.LegacyDateFormat) + legacyFormat: LegacyDateFormats.LegacyDateFormat, + isParsing: Boolean) extends DateFormatter with DateTimeFormatterHelper { @transient - private lazy val formatter = getOrCreateFormatter(pattern, locale) + private lazy val formatter = getOrCreateFormatter(pattern, locale, isParsing) @transient private lazy val legacyFormatter = DateFormatter.getLegacyFormatter( @@ -117,13 +117,7 @@ class LegacySimpleDateFormatter(pattern: String, locale: Locale) extends LegacyD object DateFormatter { import LegacyDateFormats._ - /** - * Before Spark 3.0, the first day-of-week is always Monday. Since Spark 3.0, it depends on the - * locale. - * We pick GB as the default locale instead of US, to be compatible with Spark 2.x, as US locale - * uses Sunday as the first day-of-week. See SPARK-31879. - */ - val defaultLocale: Locale = new Locale("en", "GB") + val defaultLocale: Locale = Locale.US val defaultPattern: String = "yyyy-MM-dd" @@ -131,12 +125,13 @@ object DateFormatter { format: Option[String], zoneId: ZoneId, locale: Locale = defaultLocale, - legacyFormat: LegacyDateFormat = LENIENT_SIMPLE_DATE_FORMAT): DateFormatter = { + legacyFormat: LegacyDateFormat = LENIENT_SIMPLE_DATE_FORMAT, + isParsing: Boolean): DateFormatter = { val pattern = format.getOrElse(defaultPattern) if (SQLConf.get.legacyTimeParserPolicy == LEGACY) { getLegacyFormatter(pattern, zoneId, locale, legacyFormat) } else { - val df = new Iso8601DateFormatter(pattern, zoneId, locale, legacyFormat) + val df = new Iso8601DateFormatter(pattern, zoneId, locale, legacyFormat, isParsing) df.validatePatternString() df } @@ -159,15 +154,16 @@ object DateFormatter { format: String, zoneId: ZoneId, locale: Locale, - legacyFormat: LegacyDateFormat): DateFormatter = { - getFormatter(Some(format), zoneId, locale, legacyFormat) + legacyFormat: LegacyDateFormat, + isParsing: Boolean): DateFormatter = { + getFormatter(Some(format), zoneId, locale, legacyFormat, isParsing) } - def apply(format: String, zoneId: ZoneId): DateFormatter = { - getFormatter(Some(format), zoneId) + def apply(format: String, zoneId: ZoneId, isParsing: Boolean = false): DateFormatter = { + getFormatter(Some(format), zoneId, isParsing = isParsing) } def apply(zoneId: ZoneId): DateFormatter = { - getFormatter(None, zoneId) + getFormatter(None, zoneId, isParsing = false) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala index 5b9d8396530b..8e5c8651c8c3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala @@ -62,7 +62,15 @@ trait DateTimeFormatterHelper { accessor.get(ChronoField.HOUR_OF_DAY) } else if (accessor.isSupported(ChronoField.HOUR_OF_AMPM)) { // When we reach here, it means am/pm is not specified. Here we assume it's am. + // All of CLOCK_HOUR_OF_AMPM(h)/HOUR_OF_DAY(H)/CLOCK_HOUR_OF_DAY(k)/HOUR_OF_AMPM(K) will + // be resolved to HOUR_OF_AMPM here, we do not need to handle them separately accessor.get(ChronoField.HOUR_OF_AMPM) + } else if (accessor.isSupported(ChronoField.AMPM_OF_DAY) && + accessor.get(ChronoField.AMPM_OF_DAY) == 1) { + // When reach here, the `hour` part is missing, and PM is specified. + // None of CLOCK_HOUR_OF_AMPM(h)/HOUR_OF_DAY(H)/CLOCK_HOUR_OF_DAY(k)/HOUR_OF_AMPM(K) is + // specified + 12 } else { 0 } @@ -89,9 +97,9 @@ trait DateTimeFormatterHelper { protected def getOrCreateFormatter( pattern: String, locale: Locale, - needVarLengthSecondFraction: Boolean = false): DateTimeFormatter = { - val newPattern = convertIncompatiblePattern(pattern) - val useVarLen = needVarLengthSecondFraction && newPattern.contains('S') + isParsing: Boolean): DateTimeFormatter = { + val newPattern = convertIncompatiblePattern(pattern, isParsing) + val useVarLen = isParsing && newPattern.contains('S') val key = (newPattern, locale, useVarLen) var formatter = cache.getIfPresent(key) if (formatter == null) { @@ -226,16 +234,27 @@ private object DateTimeFormatterHelper { val formatter = DateTimeFormatter.ofPattern("LLL qqq", Locale.US) formatter.format(LocalDate.of(2000, 1, 1)) == "1 1" } - final val unsupportedLetters = Set('A', 'c', 'e', 'n', 'N', 'p') + // SPARK-31892: The week-based date fields are rarely used and really confusing for parsing values + // to datetime, especially when they are mixed with other non-week-based ones; + // SPARK-31879: It's also difficult for us to restore the behavior of week-based date fields + // formatting, in DateTimeFormatter the first day of week for week-based date fields become + // localized, for the default Locale.US, it uses Sunday as the first day of week, while in Spark + // 2.4, the SimpleDateFormat uses Monday as the first day of week. + final val weekBasedLetters = Set('Y', 'W', 'w', 'u', 'e', 'c') + final val unsupportedLetters = Set('A', 'n', 'N', 'p') + // The quarter fields will also be parsed strangely, e.g. when the pattern contains `yMd` and can + // be directly resolved then the `q` do check for whether the month is valid, but if the date + // fields is incomplete, e.g. `yM`, the checking will be bypassed. + final val unsupportedLettersForParsing = Set('E', 'F', 'q', 'Q') final val unsupportedPatternLengths = { // SPARK-31771: Disable Narrow-form TextStyle to avoid silent data change, as it is Full-form in // 2.4 - Seq("G", "M", "L", "E", "u", "Q", "q").map(_ * 5) ++ + Seq("G", "M", "L", "E", "Q", "q").map(_ * 5) ++ // SPARK-31867: Disable year pattern longer than 10 which will cause Java time library throw // unchecked `ArrayIndexOutOfBoundsException` by the `NumberPrinterParser` for formatting. It // makes the call side difficult to handle exceptions and easily leads to silent data change // because of the exceptions being suppressed. - Seq("y", "Y").map(_ * 11) + Seq("y").map(_ * 11) }.toSet /** @@ -246,7 +265,7 @@ private object DateTimeFormatterHelper { * @param pattern The input pattern. * @return The pattern for new parser */ - def convertIncompatiblePattern(pattern: String): String = { + def convertIncompatiblePattern(pattern: String, isParsing: Boolean): String = { val eraDesignatorContained = pattern.split("'").zipWithIndex.exists { case (patternPart, index) => // Text can be quoted using single quotes, we only check the non-quote parts. @@ -255,7 +274,12 @@ private object DateTimeFormatterHelper { (pattern + " ").split("'").zipWithIndex.map { case (patternPart, index) => if (index % 2 == 0) { - for (c <- patternPart if unsupportedLetters.contains(c)) { + for (c <- patternPart if weekBasedLetters.contains(c)) { + throw new IllegalArgumentException(s"All week-based patterns are unsupported since" + + s" Spark 3.0, detected: $c, Please use the SQL function EXTRACT instead") + } + for (c <- patternPart if unsupportedLetters.contains(c) || + (isParsing && unsupportedLettersForParsing.contains(c))) { throw new IllegalArgumentException(s"Illegal pattern character: $c") } for (style <- unsupportedPatternLengths if patternPart.contains(style)) { @@ -267,20 +291,13 @@ private object DateTimeFormatterHelper { "or upgrade your Java version. For more details, please read " + "https://bugs.openjdk.java.net/browse/JDK-8114833") } - // The meaning of 'u' was day number of week in SimpleDateFormat, it was changed to year - // in DateTimeFormatter. Substitute 'u' to 'e' and use DateTimeFormatter to parse the - // string. If parsable, return the result; otherwise, fall back to 'u', and then use the - // legacy SimpleDateFormat parser to parse. When it is successfully parsed, throw an - // exception and ask users to change the pattern strings or turn on the legacy mode; - // otherwise, return NULL as what Spark 2.4 does. - val res = patternPart.replace("u", "e") // In DateTimeFormatter, 'u' supports negative years. We substitute 'y' to 'u' here for // keeping the support in Spark 3.0. If parse failed in Spark 3.0, fall back to 'y'. // We only do this substitution when there is no era designator found in the pattern. if (!eraDesignatorContained) { - res.replace("y", "u") + patternPart.replace("y", "u") } else { - res + patternPart } } else { patternPart diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index 1f14c70164c1..11dcdec7356f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -62,11 +62,11 @@ class Iso8601TimestampFormatter( zoneId: ZoneId, locale: Locale, legacyFormat: LegacyDateFormat = LENIENT_SIMPLE_DATE_FORMAT, - needVarLengthSecondFraction: Boolean) + isParsing: Boolean) extends TimestampFormatter with DateTimeFormatterHelper { @transient protected lazy val formatter: DateTimeFormatter = - getOrCreateFormatter(pattern, locale, needVarLengthSecondFraction) + getOrCreateFormatter(pattern, locale, isParsing) @transient protected lazy val legacyFormatter = TimestampFormatter.getLegacyFormatter( @@ -122,7 +122,7 @@ class FractionTimestampFormatter(zoneId: ZoneId) zoneId, TimestampFormatter.defaultLocale, LegacyDateFormats.FAST_DATE_FORMAT, - needVarLengthSecondFraction = false) { + isParsing = false) { @transient override protected lazy val formatter = DateTimeFormatterHelper.fractionFormatter @@ -278,13 +278,7 @@ object LegacyDateFormats extends Enumeration { object TimestampFormatter { import LegacyDateFormats._ - /** - * Before Spark 3.0, the first day-of-week is always Monday. Since Spark 3.0, it depends on the - * locale. - * We pick GB as the default locale instead of US, to be compatible with Spark 2.x, as US locale - * uses Sunday as the first day-of-week. See SPARK-31879. - */ - val defaultLocale: Locale = new Locale("en", "GB") + val defaultLocale: Locale = Locale.US def defaultPattern(): String = s"${DateFormatter.defaultPattern} HH:mm:ss" @@ -293,13 +287,13 @@ object TimestampFormatter { zoneId: ZoneId, locale: Locale = defaultLocale, legacyFormat: LegacyDateFormat = LENIENT_SIMPLE_DATE_FORMAT, - needVarLengthSecondFraction: Boolean = false): TimestampFormatter = { + isParsing: Boolean): TimestampFormatter = { val pattern = format.getOrElse(defaultPattern) if (SQLConf.get.legacyTimeParserPolicy == LEGACY) { getLegacyFormatter(pattern, zoneId, locale, legacyFormat) } else { val tf = new Iso8601TimestampFormatter( - pattern, zoneId, locale, legacyFormat, needVarLengthSecondFraction) + pattern, zoneId, locale, legacyFormat, isParsing) tf.validatePatternString() tf } @@ -325,27 +319,27 @@ object TimestampFormatter { zoneId: ZoneId, locale: Locale, legacyFormat: LegacyDateFormat, - needVarLengthSecondFraction: Boolean): TimestampFormatter = { - getFormatter(Some(format), zoneId, locale, legacyFormat, needVarLengthSecondFraction) + isParsing: Boolean): TimestampFormatter = { + getFormatter(Some(format), zoneId, locale, legacyFormat, isParsing) } def apply( format: String, zoneId: ZoneId, legacyFormat: LegacyDateFormat, - needVarLengthSecondFraction: Boolean): TimestampFormatter = { - getFormatter(Some(format), zoneId, defaultLocale, legacyFormat, needVarLengthSecondFraction) + isParsing: Boolean): TimestampFormatter = { + getFormatter(Some(format), zoneId, defaultLocale, legacyFormat, isParsing) } def apply( format: String, zoneId: ZoneId, - needVarLengthSecondFraction: Boolean = false): TimestampFormatter = { - getFormatter(Some(format), zoneId, needVarLengthSecondFraction = needVarLengthSecondFraction) + isParsing: Boolean): TimestampFormatter = { + getFormatter(Some(format), zoneId, isParsing = isParsing) } def apply(zoneId: ZoneId): TimestampFormatter = { - getFormatter(None, zoneId) + getFormatter(None, zoneId, isParsing = false) } def getFractionFormatter(zoneId: ZoneId): TimestampFormatter = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 1ca7380ead41..2dc5990eb610 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -41,7 +41,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { private val JST_OPT = Option(JST.getId) def toMillis(timestamp: String): Long = { - val tf = TimestampFormatter("yyyy-MM-dd HH:mm:ss", UTC) + val tf = TimestampFormatter("yyyy-MM-dd HH:mm:ss", UTC, isParsing = true) DateTimeUtils.microsToMillis(tf.parse(timestamp)) } val date = "2015-04-08 13:10:15" @@ -1168,4 +1168,43 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkExceptionInExpression[ArithmeticException]( MillisToTimestamp(Literal(-92233720368547758L)), "long overflow") } + + test("Disable week-based date fields and quarter fields for parsing") { + + def checkSparkUpgrade(c: Char): Unit = { + checkExceptionInExpression[SparkUpgradeException]( + new ParseToTimestamp(Literal("1"), Literal(c.toString)).child, "3.0") + checkExceptionInExpression[SparkUpgradeException]( + new ParseToDate(Literal("1"), Literal(c.toString)).child, "3.0") + checkExceptionInExpression[SparkUpgradeException]( + ToUnixTimestamp(Literal("1"), Literal(c.toString)), "3.0") + checkExceptionInExpression[SparkUpgradeException]( + UnixTimestamp(Literal("1"), Literal(c.toString)), "3.0") + } + + def checkNullify(c: Char): Unit = { + checkEvaluation(new ParseToTimestamp(Literal("1"), Literal(c.toString)).child, null) + checkEvaluation(new ParseToDate(Literal("1"), Literal(c.toString)).child, null) + checkEvaluation(ToUnixTimestamp(Literal("1"), Literal(c.toString)), null) + checkEvaluation(UnixTimestamp(Literal("1"), Literal(c.toString)), null) + } + + Seq('Y', 'W', 'w', 'E', 'u', 'F').foreach { l => + checkSparkUpgrade(l) + } + + Seq('q', 'Q').foreach { l => + checkNullify(l) + } + } + + + test("SPARK-31896: Handle am-pm timestamp parsing when hour is missing") { + checkEvaluation( + new ParseToTimestamp(Literal("PM"), Literal("a")).child, + Timestamp.valueOf("1970-01-01 12:00:00.0")) + checkEvaluation( + new ParseToTimestamp(Literal("11:11 PM"), Literal("mm:ss a")).child, + Timestamp.valueOf("1970-01-01 12:11:11.0")) + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateFormatterSuite.scala similarity index 93% rename from sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala rename to sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateFormatterSuite.scala index 7d503cc09117..4892deae92b3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateFormatterSuite.scala @@ -15,19 +15,22 @@ * limitations under the License. */ -package org.apache.spark.sql.util +package org.apache.spark.sql.catalyst.util import java.time.{DateTimeException, LocalDate} -import org.apache.spark.{SparkFunSuite, SparkUpgradeException} -import org.apache.spark.sql.catalyst.plans.SQLHelper -import org.apache.spark.sql.catalyst.util.{DateFormatter, LegacyDateFormats} +import org.apache.spark.SparkUpgradeException import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ import org.apache.spark.sql.catalyst.util.DateTimeUtils._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy -class DateFormatterSuite extends SparkFunSuite with SQLHelper { +class DateFormatterSuite extends DatetimeFormatterSuite { + + override def checkFormatterCreation(pattern: String, isParsing: Boolean): Unit = { + DateFormatter(pattern, UTC, isParsing) + } + test("parsing dates") { outstandingTimezonesIds.foreach { timeZone => withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) { @@ -72,7 +75,8 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper { DateFormatter.defaultPattern, getZoneId(timeZone), DateFormatter.defaultLocale, - legacyFormat) + legacyFormat, + isParsing = false) val days = formatter.parse(date) assert(date === formatter.format(days)) assert(date === formatter.format(daysToLocalDate(days))) @@ -106,7 +110,8 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper { DateFormatter.defaultPattern, getZoneId(timeZone), DateFormatter.defaultLocale, - legacyFormat) + legacyFormat, + isParsing = false) val date = formatter.format(days) val parsed = formatter.parse(date) assert(days === parsed) @@ -173,7 +178,8 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper { DateFormatter.defaultPattern, getZoneId(timeZone), DateFormatter.defaultLocale, - legacyFormat) + legacyFormat, + isParsing = false) assert(LocalDate.ofEpochDay(formatter.parse("1000-01-01")) === LocalDate.of(1000, 1, 1)) assert(formatter.format(LocalDate.of(1000, 1, 1)) === "1000-01-01") assert(formatter.format(localDateToDays(LocalDate.of(1000, 1, 1))) === "1000-01-01") diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelperSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelperSuite.scala index 18acaafc6d91..0b15e49af021 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelperSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelperSuite.scala @@ -17,29 +17,41 @@ package org.apache.spark.sql.catalyst.util -import org.apache.spark.{SparkFunSuite, SparkUpgradeException} +import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.util.DateTimeFormatterHelper._ class DateTimeFormatterHelperSuite extends SparkFunSuite { + private def convertIncompatiblePattern(pattern: String): String = { + DateTimeFormatterHelper.convertIncompatiblePattern(pattern, isParsing = false) + } + test("check incompatible pattern") { - assert(convertIncompatiblePattern("MM-DD-u") === "MM-DD-e") assert(convertIncompatiblePattern("yyyy-MM-dd'T'HH:mm:ss.SSSz") === "uuuu-MM-dd'T'HH:mm:ss.SSSz") assert(convertIncompatiblePattern("yyyy-MM'y contains in quoted text'HH:mm:ss") === "uuuu-MM'y contains in quoted text'HH:mm:ss") - assert(convertIncompatiblePattern("yyyy-MM-dd-u'T'HH:mm:ss.SSSz") - === "uuuu-MM-dd-e'T'HH:mm:ss.SSSz") assert(convertIncompatiblePattern("yyyy-MM'u contains in quoted text'HH:mm:ss") === "uuuu-MM'u contains in quoted text'HH:mm:ss") assert(convertIncompatiblePattern("yyyy-MM'u contains in quoted text'''''HH:mm:ss") === "uuuu-MM'u contains in quoted text'''''HH:mm:ss") assert(convertIncompatiblePattern("yyyy-MM-dd'T'HH:mm:ss.SSSz G") === "yyyy-MM-dd'T'HH:mm:ss.SSSz G") + weekBasedLetters.foreach { l => + val e = intercept[IllegalArgumentException](convertIncompatiblePattern(s"yyyy-MM-dd $l G")) + assert(e.getMessage.contains("week-based")) + } unsupportedLetters.foreach { l => val e = intercept[IllegalArgumentException](convertIncompatiblePattern(s"yyyy-MM-dd $l G")) assert(e.getMessage === s"Illegal pattern character: $l") } + unsupportedLettersForParsing.foreach { l => + val e = intercept[IllegalArgumentException] { + DateTimeFormatterHelper.convertIncompatiblePattern(s"$l", isParsing = true) + } + assert(e.getMessage === s"Illegal pattern character: $l") + assert(convertIncompatiblePattern(s"$l").nonEmpty) + } unsupportedPatternLengths.foreach { style => val e1 = intercept[IllegalArgumentException] { convertIncompatiblePattern(s"yyyy-MM-dd $style") @@ -50,7 +62,6 @@ class DateTimeFormatterHelperSuite extends SparkFunSuite { } assert(e2.getMessage === s"Too many pattern letters: ${style.head}") } - assert(convertIncompatiblePattern("yyyy-MM-dd uuuu") === "uuuu-MM-dd eeee") assert(convertIncompatiblePattern("yyyy-MM-dd EEEE") === "uuuu-MM-dd EEEE") assert(convertIncompatiblePattern("yyyy-MM-dd'e'HH:mm:ss") === "uuuu-MM-dd'e'HH:mm:ss") assert(convertIncompatiblePattern("yyyy-MM-dd'T'") === "uuuu-MM-dd'T'") diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DatetimeFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DatetimeFormatterSuite.scala new file mode 100644 index 000000000000..31ff50fda1ad --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DatetimeFormatterSuite.scala @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.util + +import org.scalatest.Matchers + +import org.apache.spark.{SparkFunSuite, SparkUpgradeException} +import org.apache.spark.sql.catalyst.plans.SQLHelper + +trait DatetimeFormatterSuite extends SparkFunSuite with SQLHelper with Matchers { + import DateTimeFormatterHelper._ + def checkFormatterCreation(pattern: String, isParsing: Boolean): Unit + + test("explicitly forbidden datetime patterns") { + + Seq(true, false).foreach { isParsing => + // not support by the legacy one too + val unsupportedBoth = Seq("QQQQQ", "qqqqq", "eeeee", "A", "c", "n", "N", "p", "e") + unsupportedBoth.foreach { pattern => + intercept[IllegalArgumentException](checkFormatterCreation(pattern, isParsing)) + } + // supported by the legacy one, then we will suggest users with SparkUpgradeException + ((weekBasedLetters ++ unsupportedLetters).map(_.toString) + ++ unsupportedPatternLengths -- unsupportedBoth).foreach { + pattern => intercept[SparkUpgradeException](checkFormatterCreation(pattern, isParsing)) + } + } + + // not support by the legacy one too + val unsupportedBoth = Seq("q", "Q") + unsupportedBoth.foreach { pattern => + intercept[IllegalArgumentException](checkFormatterCreation(pattern, true)) + } + // supported by the legacy one, then we will suggest users with SparkUpgradeException + (unsupportedLettersForParsing.map(_.toString) -- unsupportedBoth).foreach { + pattern => intercept[SparkUpgradeException](checkFormatterCreation(pattern, true)) + } + } +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala similarity index 86% rename from sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala rename to sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala index aeb238e75f02..02333a3eb9fc 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala @@ -15,23 +15,23 @@ * limitations under the License. */ -package org.apache.spark.sql.util +package org.apache.spark.sql.catalyst.util import java.time.{DateTimeException, Instant, LocalDateTime, LocalTime} import java.util.concurrent.TimeUnit -import org.scalatest.Matchers - -import org.apache.spark.{SparkFunSuite, SparkUpgradeException} -import org.apache.spark.sql.catalyst.plans.SQLHelper -import org.apache.spark.sql.catalyst.util.{LegacyDateFormats, TimestampFormatter} +import org.apache.spark.SparkUpgradeException import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ import org.apache.spark.sql.catalyst.util.DateTimeUtils._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy import org.apache.spark.unsafe.types.UTF8String -class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers { +class TimestampFormatterSuite extends DatetimeFormatterSuite { + + override def checkFormatterCreation(pattern: String, isParsing: Boolean): Unit = { + TimestampFormatter(pattern, UTC, isParsing) + } test("parsing timestamps using time zones") { val localDate = "2018-12-02T10:11:12.001234" @@ -48,7 +48,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers val formatter = TimestampFormatter( "yyyy-MM-dd'T'HH:mm:ss.SSSSSS", getZoneId(zoneId), - needVarLengthSecondFraction = true) + isParsing = true) val microsSinceEpoch = formatter.parse(localDate) assert(microsSinceEpoch === expectedMicros(zoneId)) } @@ -73,7 +73,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers // Test only FAST_DATE_FORMAT because other legacy formats don't support formatting // in microsecond precision. LegacyDateFormats.FAST_DATE_FORMAT, - needVarLengthSecondFraction = false), + isParsing = false), TimestampFormatter.getFractionFormatter(getZoneId(zoneId))).foreach { formatter => val timestamp = formatter.format(microsSinceEpoch) assert(timestamp === expectedTimestamp(zoneId)) @@ -96,9 +96,9 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers 2177456523456789L, 11858049903010203L).foreach { micros => outstandingZoneIds.foreach { zoneId => - val timestamp = TimestampFormatter(pattern, zoneId).format(micros) + val timestamp = TimestampFormatter(pattern, zoneId, isParsing = false).format(micros) val parsed = TimestampFormatter( - pattern, zoneId, needVarLengthSecondFraction = true).parse(timestamp) + pattern, zoneId, isParsing = true).parse(timestamp) assert(micros === parsed) } } @@ -119,15 +119,15 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers outstandingZoneIds.foreach { zoneId => val pattern = "yyyy-MM-dd'T'HH:mm:ss.SSSSSS" val micros = TimestampFormatter( - pattern, zoneId, needVarLengthSecondFraction = true).parse(timestamp) - val formatted = TimestampFormatter(pattern, zoneId).format(micros) + pattern, zoneId, isParsing = true).parse(timestamp) + val formatted = TimestampFormatter(pattern, zoneId, isParsing = false).format(micros) assert(timestamp === formatted) } } } test("case insensitive parsing of am and pm") { - val formatter = TimestampFormatter("yyyy MMM dd hh:mm:ss a", UTC) + val formatter = TimestampFormatter("yyyy MMM dd hh:mm:ss a", UTC, isParsing = false) val micros = formatter.parse("2009 Mar 20 11:30:01 am") assert(micros === date(2009, 3, 20, 11, 30, 1)) } @@ -157,8 +157,8 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers assert(TimestampFormatter(UTC).format(micros) === "-0099-01-01 00:00:00") assert(TimestampFormatter(UTC).format(instant) === "-0099-01-01 00:00:00") withDefaultTimeZone(UTC) { // toJavaTimestamp depends on the default time zone - assert(TimestampFormatter("yyyy-MM-dd HH:mm:SS G", UTC).format(toJavaTimestamp(micros)) - === "0100-01-01 00:00:00 BC") + assert(TimestampFormatter("yyyy-MM-dd HH:mm:SS G", UTC, isParsing = false) + .format(toJavaTimestamp(micros)) === "0100-01-01 00:00:00 BC") } } @@ -185,7 +185,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers test("parsing timestamp strings with various seconds fractions") { outstandingZoneIds.foreach { zoneId => def check(pattern: String, input: String, reference: String): Unit = { - val formatter = TimestampFormatter(pattern, zoneId, needVarLengthSecondFraction = true) + val formatter = TimestampFormatter(pattern, zoneId, isParsing = true) val expected = stringToTimestamp(UTF8String.fromString(reference), zoneId).get val actual = formatter.parse(input) assert(actual === expected) @@ -209,7 +209,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers "2019-10-14T09:39:07.1", "2019-10-14T09:39:07.1") try { - TimestampFormatter("yyyy/MM/dd HH_mm_ss.SSSSSS", zoneId, true) + TimestampFormatter("yyyy/MM/dd HH_mm_ss.SSSSSS", zoneId, isParsing = true) .parse("2019/11/14 20#25#30.123456") fail("Expected to throw an exception for the invalid input") } catch { @@ -222,7 +222,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers test("formatting timestamp strings up to microsecond precision") { outstandingZoneIds.foreach { zoneId => def check(pattern: String, input: String, expected: String): Unit = { - val formatter = TimestampFormatter(pattern, zoneId) + val formatter = TimestampFormatter(pattern, zoneId, isParsing = false) val timestamp = stringToTimestamp(UTF8String.fromString(input), zoneId).get val actual = formatter.format(timestamp) assert(actual === expected) @@ -259,7 +259,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers } test("SPARK-30958: parse timestamp with negative year") { - val formatter1 = TimestampFormatter("yyyy-MM-dd HH:mm:ss", UTC, true) + val formatter1 = TimestampFormatter("yyyy-MM-dd HH:mm:ss", UTC, isParsing = true) assert(formatter1.parse("-1234-02-22 02:22:22") === date(-1234, 2, 22, 2, 22, 22)) def assertParsingError(f: => Unit): Unit = { @@ -272,7 +272,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers } // "yyyy" with "G" can't parse negative year or year 0000. - val formatter2 = TimestampFormatter("G yyyy-MM-dd HH:mm:ss", UTC, true) + val formatter2 = TimestampFormatter("G yyyy-MM-dd HH:mm:ss", UTC, isParsing = true) assertParsingError(formatter2.parse("BC -1234-02-22 02:22:22")) assertParsingError(formatter2.parse("AC 0000-02-22 02:22:22")) @@ -292,7 +292,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers zoneId, TimestampFormatter.defaultLocale, legacyFormat, - needVarLengthSecondFraction = false) + isParsing = false) }.toSeq :+ TimestampFormatter.getFractionFormatter(zoneId) formatters.foreach { formatter => assert(microsToInstant(formatter.parse("1000-01-01 01:02:03")) @@ -318,7 +318,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers test("parsing hour with various patterns") { def createFormatter(pattern: String): TimestampFormatter = { // Use `SIMPLE_DATE_FORMAT`, so that the legacy parser also fails with invalid value range. - TimestampFormatter(pattern, UTC, LegacyDateFormats.SIMPLE_DATE_FORMAT, false) + TimestampFormatter(pattern, UTC, LegacyDateFormats.SIMPLE_DATE_FORMAT, isParsing = true) } withClue("HH") { @@ -377,38 +377,45 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers } test("missing date fields") { - val formatter = TimestampFormatter("HH:mm:ss", UTC) + val formatter = TimestampFormatter("HH:mm:ss", UTC, isParsing = true) val micros = formatter.parse("11:30:01") assert(micros === date(1970, 1, 1, 11, 30, 1)) } test("missing year field with invalid date") { // Use `SIMPLE_DATE_FORMAT`, so that the legacy parser also fails with invalid date. - val formatter = TimestampFormatter("MM-dd", UTC, LegacyDateFormats.SIMPLE_DATE_FORMAT, false) + val formatter = + TimestampFormatter("MM-dd", UTC, LegacyDateFormats.SIMPLE_DATE_FORMAT, isParsing = true) withDefaultTimeZone(UTC)(intercept[DateTimeException](formatter.parse("02-29"))) } test("missing am/pm field") { - val formatter = TimestampFormatter("yyyy hh:mm:ss", UTC) - val micros = formatter.parse("2009 11:30:01") - assert(micros === date(2009, 1, 1, 11, 30, 1)) + Seq("HH", "hh", "KK", "kk").foreach { hour => + val formatter = TimestampFormatter(s"yyyy $hour:mm:ss", UTC, isParsing = true) + val micros = formatter.parse("2009 11:30:01") + assert(micros === date(2009, 1, 1, 11, 30, 1)) + } } test("missing time fields") { - val formatter = TimestampFormatter("yyyy HH", UTC) + val formatter = TimestampFormatter("yyyy HH", UTC, isParsing = true) val micros = formatter.parse("2009 11") assert(micros === date(2009, 1, 1, 11)) } - test("explicitly forbidden datetime patterns") { - // not support by the legacy one too - Seq("QQQQQ", "qqqqq", "A", "c", "e", "n", "N", "p").foreach { pattern => - intercept[IllegalArgumentException](TimestampFormatter(pattern, UTC).format(0)) - } - // supported by the legacy one, then we will suggest users with SparkUpgradeException - Seq("GGGGG", "MMMMM", "LLLLL", "EEEEE", "uuuuu", "aa", "aaa", "y" * 11, "y" * 11) - .foreach { pattern => - intercept[SparkUpgradeException](TimestampFormatter(pattern, UTC).format(0)) - } + test("missing hour field") { + val f1 = TimestampFormatter("mm:ss a", UTC, isParsing = true) + val t1 = f1.parse("30:01 PM") + assert(t1 === date(1970, 1, 1, 12, 30, 1)) + val t2 = f1.parse("30:01 AM") + assert(t2 === date(1970, 1, 1, 0, 30, 1)) + val f2 = TimestampFormatter("mm:ss", UTC, isParsing = true) + val t3 = f2.parse("30:01") + assert(t3 === date(1970, 1, 1, 0, 30, 1)) + val f3 = TimestampFormatter("a", UTC, isParsing = true) + val t4 = f3.parse("PM") + assert(t4 === date(1970, 1, 1, 12)) + val t5 = f3.parse("AM") + assert(t5 === date(1970)) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 12160c9f4c19..ae8d33d8558b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -3509,8 +3509,8 @@ class Dataset[T] private[sql]( private[sql] def collectAsArrowToR(): Array[Any] = { val timeZoneId = sparkSession.sessionState.conf.sessionLocalTimeZone - withAction("collectAsArrowToR", queryExecution) { plan => - RRDD.serveToStream("serve-Arrow") { outputStream => + RRDD.serveToStream("serve-Arrow") { outputStream => + withAction("collectAsArrowToR", queryExecution) { plan => val buffer = new ByteArrayOutputStream() val out = new DataOutputStream(outputStream) val batchWriter = new ArrowBatchStreamWriter(schema, buffer, timeZoneId) @@ -3563,8 +3563,8 @@ class Dataset[T] private[sql]( private[sql] def collectAsArrowToPython: Array[Any] = { val timeZoneId = sparkSession.sessionState.conf.sessionLocalTimeZone - withAction("collectAsArrowToPython", queryExecution) { plan => - PythonRDD.serveToStream("serve-Arrow") { outputStream => + PythonRDD.serveToStream("serve-Arrow") { outputStream => + withAction("collectAsArrowToPython", queryExecution) { plan => val out = new DataOutputStream(outputStream) val batchWriter = new ArrowBatchStreamWriter(schema, out, timeZoneId) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala index 73484a212c16..9f99bf501156 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala @@ -19,10 +19,10 @@ package org.apache.spark.sql.execution import java.nio.charset.StandardCharsets import java.sql.{Date, Timestamp} -import java.time.{Instant, LocalDate} +import java.time.{Instant, LocalDate, ZoneOffset} import org.apache.spark.sql.Row -import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter} +import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, LegacyDateFormats, TimestampFormatter} import org.apache.spark.sql.execution.command.{DescribeCommandBase, ExecutedCommandExec, ShowTablesCommand, ShowViewsCommand} import org.apache.spark.sql.execution.datasources.v2.{DescribeTableExec, ShowTablesExec} import org.apache.spark.sql.internal.SQLConf @@ -72,9 +72,24 @@ object HiveResult { } } - private def zoneId = DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone) - private def dateFormatter = DateFormatter(zoneId) - private def timestampFormatter = TimestampFormatter.getFractionFormatter(zoneId) + // We can create the date formatter only once because it does not depend on Spark's + // session time zone controlled by the SQL config `spark.sql.session.timeZone`. + // The `zoneId` parameter is used only in parsing of special date values like `now`, + // `yesterday` and etc. but not in date formatting. While formatting of: + // - `java.time.LocalDate`, zone id is not used by `DateTimeFormatter` at all. + // - `java.sql.Date`, the date formatter delegates formatting to the legacy formatter + // which uses the default system time zone `TimeZone.getDefault`. This works correctly + // due to `DateTimeUtils.toJavaDate` which is based on the system time zone too. + private val dateFormatter = DateFormatter( + format = DateFormatter.defaultPattern, + // We can set any time zone id. UTC was taken for simplicity. + zoneId = ZoneOffset.UTC, + locale = DateFormatter.defaultLocale, + // Use `FastDateFormat` as the legacy formatter because it is thread-safe. + legacyFormat = LegacyDateFormats.FAST_DATE_FORMAT, + isParsing = false) + private def timestampFormatter = TimestampFormatter.getFractionFormatter( + DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone)) /** Formats a datum (based on the given data type) and returns the string representation. */ def toHiveString(a: (Any, DataType), nested: Boolean = false): String = a match { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala index 292ac6db04ba..f7e225b0cdc9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala @@ -134,7 +134,7 @@ object PartitioningUtils { val timestampFormatter = TimestampFormatter( timestampPartitionPattern, zoneId, - needVarLengthSecondFraction = true) + isParsing = true) // First, we need to parse every partition's path and see if we can find partition values. val (partitionValues, optDiscoveredBasePaths) = paths.map { path => parsePartition(path, typeInference, basePaths, userSpecifiedDataTypes, diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime-formatting-invalid.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime-formatting-invalid.sql new file mode 100644 index 000000000000..9072aa107f25 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime-formatting-invalid.sql @@ -0,0 +1,53 @@ +--- TESTS FOR DATETIME FORMATTING FUNCTIONS WITH INVALID PATTERNS --- + +-- separating this from datetime-formatting.sql, because the text form +-- for patterns with 5 letters in SimpleDateFormat varies from different JDKs +select date_format('2018-11-17 13:33:33.333', 'GGGGG'); +-- pattern letter count can not be greater than 10 +select date_format('2018-11-17 13:33:33.333', 'yyyyyyyyyyy'); +-- q/L in JDK 8 will fail when the count is more than 2 +select date_format('2018-11-17 13:33:33.333', 'qqqqq'); +select date_format('2018-11-17 13:33:33.333', 'QQQQQ'); +select date_format('2018-11-17 13:33:33.333', 'MMMMM'); +select date_format('2018-11-17 13:33:33.333', 'LLLLL'); + +select date_format('2018-11-17 13:33:33.333', 'EEEEE'); +select date_format('2018-11-17 13:33:33.333', 'FF'); +select date_format('2018-11-17 13:33:33.333', 'ddd'); +-- DD is invalid if the day-of-year exceeds 100, but it becomes valid in Java 11 +-- select date_format('2018-11-17 13:33:33.333', 'DD'); +select date_format('2018-11-17 13:33:33.333', 'DDDD'); +select date_format('2018-11-17 13:33:33.333', 'HHH'); +select date_format('2018-11-17 13:33:33.333', 'hhh'); +select date_format('2018-11-17 13:33:33.333', 'kkk'); +select date_format('2018-11-17 13:33:33.333', 'KKK'); +select date_format('2018-11-17 13:33:33.333', 'mmm'); +select date_format('2018-11-17 13:33:33.333', 'sss'); +select date_format('2018-11-17 13:33:33.333', 'SSSSSSSSSS'); +select date_format('2018-11-17 13:33:33.333', 'aa'); +select date_format('2018-11-17 13:33:33.333', 'V'); +select date_format('2018-11-17 13:33:33.333', 'zzzzz'); +select date_format('2018-11-17 13:33:33.333', 'XXXXXX'); +select date_format('2018-11-17 13:33:33.333', 'ZZZZZZ'); +select date_format('2018-11-17 13:33:33.333', 'OO'); +select date_format('2018-11-17 13:33:33.333', 'xxxxxx'); + +select date_format('2018-11-17 13:33:33.333', 'A'); +select date_format('2018-11-17 13:33:33.333', 'n'); +select date_format('2018-11-17 13:33:33.333', 'N'); +select date_format('2018-11-17 13:33:33.333', 'p'); + +-- disabled week-based patterns +select date_format('2018-11-17 13:33:33.333', 'Y'); +select date_format('2018-11-17 13:33:33.333', 'w'); +select date_format('2018-11-17 13:33:33.333', 'W'); +select date_format('2018-11-17 13:33:33.333', 'u'); +select date_format('2018-11-17 13:33:33.333', 'e'); +select date_format('2018-11-17 13:33:33.333', 'c'); + +-- others +select date_format('2018-11-17 13:33:33.333', 'B'); +select date_format('2018-11-17 13:33:33.333', 'C'); +select date_format('2018-11-17 13:33:33.333', 'I'); + + diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime-formatting-legacy.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime-formatting-legacy.sql new file mode 100644 index 000000000000..19cab61a7ee5 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime-formatting-legacy.sql @@ -0,0 +1,2 @@ +--SET spark.sql.legacy.timeParserPolicy=LEGACY +--IMPORT datetime-formatting.sql \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime-formatting.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime-formatting.sql new file mode 100644 index 000000000000..3b23a7785f6c --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime-formatting.sql @@ -0,0 +1,68 @@ +--- TESTS FOR DATETIME FORMATTING FUNCTIONS --- + +create temporary view v as select col from values + (timestamp '1582-06-01 11:33:33.123UTC+080000'), + (timestamp '1970-01-01 00:00:00.000Europe/Paris'), + (timestamp '1970-12-31 23:59:59.999Asia/Srednekolymsk'), + (timestamp '1996-04-01 00:33:33.123Australia/Darwin'), + (timestamp '2018-11-17 13:33:33.123Z'), + (timestamp '2020-01-01 01:33:33.123Asia/Shanghai'), + (timestamp '2100-01-01 01:33:33.123America/Los_Angeles') t(col); + +select col, date_format(col, 'G GG GGG GGGG') from v; + +select col, date_format(col, 'y yy yyy yyyy yyyyy yyyyyy yyyyyyy yyyyyyyy yyyyyyyyy yyyyyyyyyy') from v; + +select col, date_format(col, 'q qq') from v; + +select col, date_format(col, 'Q QQ QQQ QQQQ') from v; + +select col, date_format(col, 'M MM MMM MMMM') from v; + +select col, date_format(col, 'L LL') from v; + +select col, date_format(col, 'E EE EEE EEEE') from v; + +select col, date_format(col, 'F') from v; + +select col, date_format(col, 'd dd') from v; + +select col, date_format(col, 'DD') from v where col = timestamp '2100-01-01 01:33:33.123America/Los_Angeles'; +select col, date_format(col, 'D DDD') from v; + +select col, date_format(col, 'H HH') from v; + +select col, date_format(col, 'h hh') from v; + +select col, date_format(col, 'k kk') from v; + +select col, date_format(col, 'K KK') from v; + +select col, date_format(col, 'm mm') from v; + +select col, date_format(col, 's ss') from v; + +select col, date_format(col, 'S SS SSS SSSS SSSSS SSSSSS SSSSSSS SSSSSSSS SSSSSSSSS') from v; + +select col, date_format(col, 'a') from v; + +select col, date_format(col, 'VV') from v; + +select col, date_format(col, 'z zz zzz zzzz') from v; + +select col, date_format(col, 'X XX XXX') from v; +select col, date_format(col, 'XXXX XXXXX') from v; + +select col, date_format(col, 'Z ZZ ZZZ ZZZZ ZZZZZ') from v; + +select col, date_format(col, 'O OOOO') from v; + +select col, date_format(col, 'x xx xxx xxxx xxxx xxxxx') from v; + +-- optional pattern, but the results won't be optional for formatting +select col, date_format(col, '[yyyy-MM-dd HH:mm:ss]') from v; + +-- literals +select col, date_format(col, "姚123'GyYqQMLwWuEFDdhHmsSaVzZxXOV'") from v; +select col, date_format(col, "''") from v; +select col, date_format(col, '') from v; diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql index 5636e0b67036..a63bb8526da4 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql @@ -124,10 +124,6 @@ select to_timestamp("12.1234019-10-06S10:11", "ss.SSSSy-MM-dd'S'HH:mm"); select to_timestamp("2019-10-06S", "yyyy-MM-dd'S'"); select to_timestamp("S2019-10-06", "'S'yyyy-MM-dd"); -select date_format(timestamp '2019-10-06', 'yyyy-MM-dd uuee'); -select date_format(timestamp '2019-10-06', 'yyyy-MM-dd uucc'); -select date_format(timestamp '2019-10-06', 'yyyy-MM-dd uuuu'); - select to_timestamp("2019-10-06T10:11:12'12", "yyyy-MM-dd'T'HH:mm:ss''SSSS"); -- middle select to_timestamp("2019-10-06T10:11:12'", "yyyy-MM-dd'T'HH:mm:ss''"); -- tail select to_timestamp("'2019-10-06T10:11:12", "''yyyy-MM-dd'T'HH:mm:ss"); -- head @@ -164,7 +160,3 @@ select from_csv('26/October/2015', 'date Date', map('dateFormat', 'dd/MMMMM/yyyy select from_unixtime(1, 'yyyyyyyyyyy-MM-dd'); select date_format(timestamp '2018-11-17 13:33:33', 'yyyyyyyyyy-MM-dd HH:mm:ss'); select date_format(date '2018-11-17', 'yyyyyyyyyyy-MM-dd'); - --- SPARK-31879: the first day of week -select date_format('2020-01-01', 'YYYY-MM-dd uu'); -select date_format('2020-01-01', 'YYYY-MM-dd uuuu'); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out index 3803460f3f08..a4e6e79b4573 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 121 +-- Number of queries: 116 -- !query @@ -734,32 +734,6 @@ struct 2019-10-06 00:00:00 --- !query -select date_format(timestamp '2019-10-06', 'yyyy-MM-dd uuee') --- !query schema -struct<> --- !query output -java.lang.IllegalArgumentException -Illegal pattern character: e - - --- !query -select date_format(timestamp '2019-10-06', 'yyyy-MM-dd uucc') --- !query schema -struct<> --- !query output -java.lang.IllegalArgumentException -Illegal pattern character: c - - --- !query -select date_format(timestamp '2019-10-06', 'yyyy-MM-dd uuuu') --- !query schema -struct --- !query output -2019-10-06 Sunday - - -- !query select to_timestamp("2019-10-06T10:11:12'12", "yyyy-MM-dd'T'HH:mm:ss''SSSS") -- !query schema @@ -1025,19 +999,3 @@ struct<> -- !query output org.apache.spark.SparkUpgradeException You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyyyyyyyyy-MM-dd' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html - - --- !query -select date_format('2020-01-01', 'YYYY-MM-dd uu') --- !query schema -struct --- !query output -2020-01-01 03 - - --- !query -select date_format('2020-01-01', 'YYYY-MM-dd uuuu') --- !query schema -struct --- !query output -2020-01-01 Wednesday diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-formatting-invalid.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-formatting-invalid.sql.out new file mode 100644 index 000000000000..248157efacde --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/datetime-formatting-invalid.sql.out @@ -0,0 +1,335 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 37 + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'GGGGG') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'GGGGG' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'yyyyyyyyyyy') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyyyyyyyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'qqqqq') +-- !query schema +struct<> +-- !query output +java.lang.IllegalArgumentException +Too many pattern letters: q + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'QQQQQ') +-- !query schema +struct<> +-- !query output +java.lang.IllegalArgumentException +Too many pattern letters: Q + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'MMMMM') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'MMMMM' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'LLLLL') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'LLLLL' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'EEEEE') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'EEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'FF') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'FF' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'ddd') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'ddd' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'DDDD') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'DDDD' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'HHH') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'HHH' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'hhh') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'hhh' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'kkk') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'kkk' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'KKK') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'KKK' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'mmm') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'mmm' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'sss') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'sss' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'SSSSSSSSSS') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'SSSSSSSSSS' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'aa') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'aa' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'V') +-- !query schema +struct<> +-- !query output +java.lang.IllegalArgumentException +Pattern letter count must be 2: V + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'zzzzz') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'zzzzz' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'XXXXXX') +-- !query schema +struct<> +-- !query output +java.lang.IllegalArgumentException +Too many pattern letters: X + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'ZZZZZZ') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'ZZZZZZ' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'OO') +-- !query schema +struct<> +-- !query output +java.lang.IllegalArgumentException +Pattern letter count must be 1 or 4: O + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'xxxxxx') +-- !query schema +struct<> +-- !query output +java.lang.IllegalArgumentException +Too many pattern letters: x + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'A') +-- !query schema +struct<> +-- !query output +java.lang.IllegalArgumentException +Illegal pattern character: A + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'n') +-- !query schema +struct<> +-- !query output +java.lang.IllegalArgumentException +Illegal pattern character: n + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'N') +-- !query schema +struct<> +-- !query output +java.lang.IllegalArgumentException +Illegal pattern character: N + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'p') +-- !query schema +struct<> +-- !query output +java.lang.IllegalArgumentException +Illegal pattern character: p + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'Y') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'Y' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'w') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'w' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'W') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'W' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'u') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'u' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'e') +-- !query schema +struct<> +-- !query output +java.lang.IllegalArgumentException +All week-based patterns are unsupported since Spark 3.0, detected: e, Please use the SQL function EXTRACT instead + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'c') +-- !query schema +struct<> +-- !query output +java.lang.IllegalArgumentException +All week-based patterns are unsupported since Spark 3.0, detected: c, Please use the SQL function EXTRACT instead + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'B') +-- !query schema +struct<> +-- !query output +java.lang.IllegalArgumentException +Unknown pattern letter: B + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'C') +-- !query schema +struct<> +-- !query output +java.lang.IllegalArgumentException +Unknown pattern letter: C + + +-- !query +select date_format('2018-11-17 13:33:33.333', 'I') +-- !query schema +struct<> +-- !query output +java.lang.IllegalArgumentException +Unknown pattern letter: I diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-formatting-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-formatting-legacy.sql.out new file mode 100644 index 000000000000..b7bc448a952a --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/datetime-formatting-legacy.sql.out @@ -0,0 +1,401 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 31 + + +-- !query +create temporary view v as select col from values + (timestamp '1582-06-01 11:33:33.123UTC+080000'), + (timestamp '1970-01-01 00:00:00.000Europe/Paris'), + (timestamp '1970-12-31 23:59:59.999Asia/Srednekolymsk'), + (timestamp '1996-04-01 00:33:33.123Australia/Darwin'), + (timestamp '2018-11-17 13:33:33.123Z'), + (timestamp '2020-01-01 01:33:33.123Asia/Shanghai'), + (timestamp '2100-01-01 01:33:33.123America/Los_Angeles') t(col) +-- !query schema +struct<> +-- !query output + + + +-- !query +select col, date_format(col, 'G GG GGG GGGG') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 AD AD AD AD +1969-12-31 15:00:00 AD AD AD AD +1970-12-31 04:59:59.999 AD AD AD AD +1996-03-31 07:03:33.123 AD AD AD AD +2018-11-17 05:33:33.123 AD AD AD AD +2019-12-31 09:33:33.123 AD AD AD AD +2100-01-01 01:33:33.123 AD AD AD AD + + +-- !query +select col, date_format(col, 'y yy yyy yyyy yyyyy yyyyyy yyyyyyy yyyyyyyy yyyyyyyyy yyyyyyyyyy') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 1582 82 1582 1582 01582 001582 0001582 00001582 000001582 0000001582 +1969-12-31 15:00:00 1969 69 1969 1969 01969 001969 0001969 00001969 000001969 0000001969 +1970-12-31 04:59:59.999 1970 70 1970 1970 01970 001970 0001970 00001970 000001970 0000001970 +1996-03-31 07:03:33.123 1996 96 1996 1996 01996 001996 0001996 00001996 000001996 0000001996 +2018-11-17 05:33:33.123 2018 18 2018 2018 02018 002018 0002018 00002018 000002018 0000002018 +2019-12-31 09:33:33.123 2019 19 2019 2019 02019 002019 0002019 00002019 000002019 0000002019 +2100-01-01 01:33:33.123 2100 00 2100 2100 02100 002100 0002100 00002100 000002100 0000002100 + + +-- !query +select col, date_format(col, 'q qq') from v +-- !query schema +struct<> +-- !query output +java.lang.IllegalArgumentException +Illegal pattern character 'q' + + +-- !query +select col, date_format(col, 'Q QQ QQQ QQQQ') from v +-- !query schema +struct<> +-- !query output +java.lang.IllegalArgumentException +Illegal pattern character 'Q' + + +-- !query +select col, date_format(col, 'M MM MMM MMMM') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 5 05 May May +1969-12-31 15:00:00 12 12 Dec December +1970-12-31 04:59:59.999 12 12 Dec December +1996-03-31 07:03:33.123 3 03 Mar March +2018-11-17 05:33:33.123 11 11 Nov November +2019-12-31 09:33:33.123 12 12 Dec December +2100-01-01 01:33:33.123 1 01 Jan January + + +-- !query +select col, date_format(col, 'L LL') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 5 05 +1969-12-31 15:00:00 12 12 +1970-12-31 04:59:59.999 12 12 +1996-03-31 07:03:33.123 3 03 +2018-11-17 05:33:33.123 11 11 +2019-12-31 09:33:33.123 12 12 +2100-01-01 01:33:33.123 1 01 + + +-- !query +select col, date_format(col, 'E EE EEE EEEE') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 Thu Thu Thu Thursday +1969-12-31 15:00:00 Wed Wed Wed Wednesday +1970-12-31 04:59:59.999 Thu Thu Thu Thursday +1996-03-31 07:03:33.123 Sun Sun Sun Sunday +2018-11-17 05:33:33.123 Sat Sat Sat Saturday +2019-12-31 09:33:33.123 Tue Tue Tue Tuesday +2100-01-01 01:33:33.123 Fri Fri Fri Friday + + +-- !query +select col, date_format(col, 'F') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 5 +1969-12-31 15:00:00 5 +1970-12-31 04:59:59.999 5 +1996-03-31 07:03:33.123 5 +2018-11-17 05:33:33.123 3 +2019-12-31 09:33:33.123 5 +2100-01-01 01:33:33.123 1 + + +-- !query +select col, date_format(col, 'd dd') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 31 31 +1969-12-31 15:00:00 31 31 +1970-12-31 04:59:59.999 31 31 +1996-03-31 07:03:33.123 31 31 +2018-11-17 05:33:33.123 17 17 +2019-12-31 09:33:33.123 31 31 +2100-01-01 01:33:33.123 1 01 + + +-- !query +select col, date_format(col, 'DD') from v where col = timestamp '2100-01-01 01:33:33.123America/Los_Angeles' +-- !query schema +struct +-- !query output +2100-01-01 01:33:33.123 01 + + +-- !query +select col, date_format(col, 'D DDD') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 151 151 +1969-12-31 15:00:00 365 365 +1970-12-31 04:59:59.999 365 365 +1996-03-31 07:03:33.123 91 091 +2018-11-17 05:33:33.123 321 321 +2019-12-31 09:33:33.123 365 365 +2100-01-01 01:33:33.123 1 001 + + +-- !query +select col, date_format(col, 'H HH') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 19 19 +1969-12-31 15:00:00 15 15 +1970-12-31 04:59:59.999 4 04 +1996-03-31 07:03:33.123 7 07 +2018-11-17 05:33:33.123 5 05 +2019-12-31 09:33:33.123 9 09 +2100-01-01 01:33:33.123 1 01 + + +-- !query +select col, date_format(col, 'h hh') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 7 07 +1969-12-31 15:00:00 3 03 +1970-12-31 04:59:59.999 4 04 +1996-03-31 07:03:33.123 7 07 +2018-11-17 05:33:33.123 5 05 +2019-12-31 09:33:33.123 9 09 +2100-01-01 01:33:33.123 1 01 + + +-- !query +select col, date_format(col, 'k kk') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 19 19 +1969-12-31 15:00:00 15 15 +1970-12-31 04:59:59.999 4 04 +1996-03-31 07:03:33.123 7 07 +2018-11-17 05:33:33.123 5 05 +2019-12-31 09:33:33.123 9 09 +2100-01-01 01:33:33.123 1 01 + + +-- !query +select col, date_format(col, 'K KK') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 7 07 +1969-12-31 15:00:00 3 03 +1970-12-31 04:59:59.999 4 04 +1996-03-31 07:03:33.123 7 07 +2018-11-17 05:33:33.123 5 05 +2019-12-31 09:33:33.123 9 09 +2100-01-01 01:33:33.123 1 01 + + +-- !query +select col, date_format(col, 'm mm') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 40 40 +1969-12-31 15:00:00 0 00 +1970-12-31 04:59:59.999 59 59 +1996-03-31 07:03:33.123 3 03 +2018-11-17 05:33:33.123 33 33 +2019-12-31 09:33:33.123 33 33 +2100-01-01 01:33:33.123 33 33 + + +-- !query +select col, date_format(col, 's ss') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 35 35 +1969-12-31 15:00:00 0 00 +1970-12-31 04:59:59.999 59 59 +1996-03-31 07:03:33.123 33 33 +2018-11-17 05:33:33.123 33 33 +2019-12-31 09:33:33.123 33 33 +2100-01-01 01:33:33.123 33 33 + + +-- !query +select col, date_format(col, 'S SS SSS SSSS SSSSS SSSSSS SSSSSSS SSSSSSSS SSSSSSSSS') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 123 123 123 0123 00123 000123 0000123 00000123 000000123 +1969-12-31 15:00:00 0 00 000 0000 00000 000000 0000000 00000000 000000000 +1970-12-31 04:59:59.999 999 999 999 0999 00999 000999 0000999 00000999 000000999 +1996-03-31 07:03:33.123 123 123 123 0123 00123 000123 0000123 00000123 000000123 +2018-11-17 05:33:33.123 123 123 123 0123 00123 000123 0000123 00000123 000000123 +2019-12-31 09:33:33.123 123 123 123 0123 00123 000123 0000123 00000123 000000123 +2100-01-01 01:33:33.123 123 123 123 0123 00123 000123 0000123 00000123 000000123 + + +-- !query +select col, date_format(col, 'a') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 PM +1969-12-31 15:00:00 PM +1970-12-31 04:59:59.999 AM +1996-03-31 07:03:33.123 AM +2018-11-17 05:33:33.123 AM +2019-12-31 09:33:33.123 AM +2100-01-01 01:33:33.123 AM + + +-- !query +select col, date_format(col, 'VV') from v +-- !query schema +struct<> +-- !query output +java.lang.IllegalArgumentException +Illegal pattern character 'V' + + +-- !query +select col, date_format(col, 'z zz zzz zzzz') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 PST PST PST Pacific Standard Time +1969-12-31 15:00:00 PST PST PST Pacific Standard Time +1970-12-31 04:59:59.999 PST PST PST Pacific Standard Time +1996-03-31 07:03:33.123 PST PST PST Pacific Standard Time +2018-11-17 05:33:33.123 PST PST PST Pacific Standard Time +2019-12-31 09:33:33.123 PST PST PST Pacific Standard Time +2100-01-01 01:33:33.123 PST PST PST Pacific Standard Time + + +-- !query +select col, date_format(col, 'X XX XXX') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 -08 -0800 -08:00 +1969-12-31 15:00:00 -08 -0800 -08:00 +1970-12-31 04:59:59.999 -08 -0800 -08:00 +1996-03-31 07:03:33.123 -08 -0800 -08:00 +2018-11-17 05:33:33.123 -08 -0800 -08:00 +2019-12-31 09:33:33.123 -08 -0800 -08:00 +2100-01-01 01:33:33.123 -08 -0800 -08:00 + + +-- !query +select col, date_format(col, 'XXXX XXXXX') from v +-- !query schema +struct<> +-- !query output +java.lang.IllegalArgumentException +invalid ISO 8601 format: length=4 + + +-- !query +select col, date_format(col, 'Z ZZ ZZZ ZZZZ ZZZZZ') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 -0800 -0800 -0800 -0800 -0800 +1969-12-31 15:00:00 -0800 -0800 -0800 -0800 -0800 +1970-12-31 04:59:59.999 -0800 -0800 -0800 -0800 -0800 +1996-03-31 07:03:33.123 -0800 -0800 -0800 -0800 -0800 +2018-11-17 05:33:33.123 -0800 -0800 -0800 -0800 -0800 +2019-12-31 09:33:33.123 -0800 -0800 -0800 -0800 -0800 +2100-01-01 01:33:33.123 -0800 -0800 -0800 -0800 -0800 + + +-- !query +select col, date_format(col, 'O OOOO') from v +-- !query schema +struct<> +-- !query output +java.lang.IllegalArgumentException +Illegal pattern character 'O' + + +-- !query +select col, date_format(col, 'x xx xxx xxxx xxxx xxxxx') from v +-- !query schema +struct<> +-- !query output +java.lang.IllegalArgumentException +Illegal pattern character 'x' + + +-- !query +select col, date_format(col, '[yyyy-MM-dd HH:mm:ss]') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 [1582-05-31 19:40:35] +1969-12-31 15:00:00 [1969-12-31 15:00:00] +1970-12-31 04:59:59.999 [1970-12-31 04:59:59] +1996-03-31 07:03:33.123 [1996-03-31 07:03:33] +2018-11-17 05:33:33.123 [2018-11-17 05:33:33] +2019-12-31 09:33:33.123 [2019-12-31 09:33:33] +2100-01-01 01:33:33.123 [2100-01-01 01:33:33] + + +-- !query +select col, date_format(col, "姚123'GyYqQMLwWuEFDdhHmsSaVzZxXOV'") from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 姚123GyYqQMLwWuEFDdhHmsSaVzZxXOV +1969-12-31 15:00:00 姚123GyYqQMLwWuEFDdhHmsSaVzZxXOV +1970-12-31 04:59:59.999 姚123GyYqQMLwWuEFDdhHmsSaVzZxXOV +1996-03-31 07:03:33.123 姚123GyYqQMLwWuEFDdhHmsSaVzZxXOV +2018-11-17 05:33:33.123 姚123GyYqQMLwWuEFDdhHmsSaVzZxXOV +2019-12-31 09:33:33.123 姚123GyYqQMLwWuEFDdhHmsSaVzZxXOV +2100-01-01 01:33:33.123 姚123GyYqQMLwWuEFDdhHmsSaVzZxXOV + + +-- !query +select col, date_format(col, "''") from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 ' +1969-12-31 15:00:00 ' +1970-12-31 04:59:59.999 ' +1996-03-31 07:03:33.123 ' +2018-11-17 05:33:33.123 ' +2019-12-31 09:33:33.123 ' +2100-01-01 01:33:33.123 ' + + +-- !query +select col, date_format(col, '') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 +1969-12-31 15:00:00 +1970-12-31 04:59:59.999 +1996-03-31 07:03:33.123 +2018-11-17 05:33:33.123 +2019-12-31 09:33:33.123 +2100-01-01 01:33:33.123 diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-formatting.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-formatting.sql.out new file mode 100644 index 000000000000..f724658d354d --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/datetime-formatting.sql.out @@ -0,0 +1,431 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 31 + + +-- !query +create temporary view v as select col from values + (timestamp '1582-06-01 11:33:33.123UTC+080000'), + (timestamp '1970-01-01 00:00:00.000Europe/Paris'), + (timestamp '1970-12-31 23:59:59.999Asia/Srednekolymsk'), + (timestamp '1996-04-01 00:33:33.123Australia/Darwin'), + (timestamp '2018-11-17 13:33:33.123Z'), + (timestamp '2020-01-01 01:33:33.123Asia/Shanghai'), + (timestamp '2100-01-01 01:33:33.123America/Los_Angeles') t(col) +-- !query schema +struct<> +-- !query output + + + +-- !query +select col, date_format(col, 'G GG GGG GGGG') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 AD AD AD Anno Domini +1969-12-31 15:00:00 AD AD AD Anno Domini +1970-12-31 04:59:59.999 AD AD AD Anno Domini +1996-03-31 07:03:33.123 AD AD AD Anno Domini +2018-11-17 05:33:33.123 AD AD AD Anno Domini +2019-12-31 09:33:33.123 AD AD AD Anno Domini +2100-01-01 01:33:33.123 AD AD AD Anno Domini + + +-- !query +select col, date_format(col, 'y yy yyy yyyy yyyyy yyyyyy yyyyyyy yyyyyyyy yyyyyyyyy yyyyyyyyyy') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 1582 82 1582 1582 01582 001582 0001582 00001582 000001582 0000001582 +1969-12-31 15:00:00 1969 69 1969 1969 01969 001969 0001969 00001969 000001969 0000001969 +1970-12-31 04:59:59.999 1970 70 1970 1970 01970 001970 0001970 00001970 000001970 0000001970 +1996-03-31 07:03:33.123 1996 96 1996 1996 01996 001996 0001996 00001996 000001996 0000001996 +2018-11-17 05:33:33.123 2018 18 2018 2018 02018 002018 0002018 00002018 000002018 0000002018 +2019-12-31 09:33:33.123 2019 19 2019 2019 02019 002019 0002019 00002019 000002019 0000002019 +2100-01-01 01:33:33.123 2100 00 2100 2100 02100 002100 0002100 00002100 000002100 0000002100 + + +-- !query +select col, date_format(col, 'q qq') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 2 02 +1969-12-31 15:00:00 4 04 +1970-12-31 04:59:59.999 4 04 +1996-03-31 07:03:33.123 1 01 +2018-11-17 05:33:33.123 4 04 +2019-12-31 09:33:33.123 4 04 +2100-01-01 01:33:33.123 1 01 + + +-- !query +select col, date_format(col, 'Q QQ QQQ QQQQ') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 2 02 Q2 2nd quarter +1969-12-31 15:00:00 4 04 Q4 4th quarter +1970-12-31 04:59:59.999 4 04 Q4 4th quarter +1996-03-31 07:03:33.123 1 01 Q1 1st quarter +2018-11-17 05:33:33.123 4 04 Q4 4th quarter +2019-12-31 09:33:33.123 4 04 Q4 4th quarter +2100-01-01 01:33:33.123 1 01 Q1 1st quarter + + +-- !query +select col, date_format(col, 'M MM MMM MMMM') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 5 05 May May +1969-12-31 15:00:00 12 12 Dec December +1970-12-31 04:59:59.999 12 12 Dec December +1996-03-31 07:03:33.123 3 03 Mar March +2018-11-17 05:33:33.123 11 11 Nov November +2019-12-31 09:33:33.123 12 12 Dec December +2100-01-01 01:33:33.123 1 01 Jan January + + +-- !query +select col, date_format(col, 'L LL') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 5 05 +1969-12-31 15:00:00 12 12 +1970-12-31 04:59:59.999 12 12 +1996-03-31 07:03:33.123 3 03 +2018-11-17 05:33:33.123 11 11 +2019-12-31 09:33:33.123 12 12 +2100-01-01 01:33:33.123 1 01 + + +-- !query +select col, date_format(col, 'E EE EEE EEEE') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 Mon Mon Mon Monday +1969-12-31 15:00:00 Wed Wed Wed Wednesday +1970-12-31 04:59:59.999 Thu Thu Thu Thursday +1996-03-31 07:03:33.123 Sun Sun Sun Sunday +2018-11-17 05:33:33.123 Sat Sat Sat Saturday +2019-12-31 09:33:33.123 Tue Tue Tue Tuesday +2100-01-01 01:33:33.123 Fri Fri Fri Friday + + +-- !query +select col, date_format(col, 'F') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 3 +1969-12-31 15:00:00 3 +1970-12-31 04:59:59.999 3 +1996-03-31 07:03:33.123 3 +2018-11-17 05:33:33.123 3 +2019-12-31 09:33:33.123 3 +2100-01-01 01:33:33.123 1 + + +-- !query +select col, date_format(col, 'd dd') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 31 31 +1969-12-31 15:00:00 31 31 +1970-12-31 04:59:59.999 31 31 +1996-03-31 07:03:33.123 31 31 +2018-11-17 05:33:33.123 17 17 +2019-12-31 09:33:33.123 31 31 +2100-01-01 01:33:33.123 1 01 + + +-- !query +select col, date_format(col, 'DD') from v where col = timestamp '2100-01-01 01:33:33.123America/Los_Angeles' +-- !query schema +struct +-- !query output +2100-01-01 01:33:33.123 01 + + +-- !query +select col, date_format(col, 'D DDD') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 151 151 +1969-12-31 15:00:00 365 365 +1970-12-31 04:59:59.999 365 365 +1996-03-31 07:03:33.123 91 091 +2018-11-17 05:33:33.123 321 321 +2019-12-31 09:33:33.123 365 365 +2100-01-01 01:33:33.123 1 001 + + +-- !query +select col, date_format(col, 'H HH') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 19 19 +1969-12-31 15:00:00 15 15 +1970-12-31 04:59:59.999 4 04 +1996-03-31 07:03:33.123 7 07 +2018-11-17 05:33:33.123 5 05 +2019-12-31 09:33:33.123 9 09 +2100-01-01 01:33:33.123 1 01 + + +-- !query +select col, date_format(col, 'h hh') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 7 07 +1969-12-31 15:00:00 3 03 +1970-12-31 04:59:59.999 4 04 +1996-03-31 07:03:33.123 7 07 +2018-11-17 05:33:33.123 5 05 +2019-12-31 09:33:33.123 9 09 +2100-01-01 01:33:33.123 1 01 + + +-- !query +select col, date_format(col, 'k kk') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 19 19 +1969-12-31 15:00:00 15 15 +1970-12-31 04:59:59.999 4 04 +1996-03-31 07:03:33.123 7 07 +2018-11-17 05:33:33.123 5 05 +2019-12-31 09:33:33.123 9 09 +2100-01-01 01:33:33.123 1 01 + + +-- !query +select col, date_format(col, 'K KK') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 7 07 +1969-12-31 15:00:00 3 03 +1970-12-31 04:59:59.999 4 04 +1996-03-31 07:03:33.123 7 07 +2018-11-17 05:33:33.123 5 05 +2019-12-31 09:33:33.123 9 09 +2100-01-01 01:33:33.123 1 01 + + +-- !query +select col, date_format(col, 'm mm') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 40 40 +1969-12-31 15:00:00 0 00 +1970-12-31 04:59:59.999 59 59 +1996-03-31 07:03:33.123 3 03 +2018-11-17 05:33:33.123 33 33 +2019-12-31 09:33:33.123 33 33 +2100-01-01 01:33:33.123 33 33 + + +-- !query +select col, date_format(col, 's ss') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 35 35 +1969-12-31 15:00:00 0 00 +1970-12-31 04:59:59.999 59 59 +1996-03-31 07:03:33.123 33 33 +2018-11-17 05:33:33.123 33 33 +2019-12-31 09:33:33.123 33 33 +2100-01-01 01:33:33.123 33 33 + + +-- !query +select col, date_format(col, 'S SS SSS SSSS SSSSS SSSSSS SSSSSSS SSSSSSSS SSSSSSSSS') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 1 12 123 1230 12300 123000 1230000 12300000 123000000 +1969-12-31 15:00:00 0 00 000 0000 00000 000000 0000000 00000000 000000000 +1970-12-31 04:59:59.999 9 99 999 9990 99900 999000 9990000 99900000 999000000 +1996-03-31 07:03:33.123 1 12 123 1230 12300 123000 1230000 12300000 123000000 +2018-11-17 05:33:33.123 1 12 123 1230 12300 123000 1230000 12300000 123000000 +2019-12-31 09:33:33.123 1 12 123 1230 12300 123000 1230000 12300000 123000000 +2100-01-01 01:33:33.123 1 12 123 1230 12300 123000 1230000 12300000 123000000 + + +-- !query +select col, date_format(col, 'a') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 PM +1969-12-31 15:00:00 PM +1970-12-31 04:59:59.999 AM +1996-03-31 07:03:33.123 AM +2018-11-17 05:33:33.123 AM +2019-12-31 09:33:33.123 AM +2100-01-01 01:33:33.123 AM + + +-- !query +select col, date_format(col, 'VV') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 America/Los_Angeles +1969-12-31 15:00:00 America/Los_Angeles +1970-12-31 04:59:59.999 America/Los_Angeles +1996-03-31 07:03:33.123 America/Los_Angeles +2018-11-17 05:33:33.123 America/Los_Angeles +2019-12-31 09:33:33.123 America/Los_Angeles +2100-01-01 01:33:33.123 America/Los_Angeles + + +-- !query +select col, date_format(col, 'z zz zzz zzzz') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 PST PST PST Pacific Standard Time +1969-12-31 15:00:00 PST PST PST Pacific Standard Time +1970-12-31 04:59:59.999 PST PST PST Pacific Standard Time +1996-03-31 07:03:33.123 PST PST PST Pacific Standard Time +2018-11-17 05:33:33.123 PST PST PST Pacific Standard Time +2019-12-31 09:33:33.123 PST PST PST Pacific Standard Time +2100-01-01 01:33:33.123 PST PST PST Pacific Standard Time + + +-- !query +select col, date_format(col, 'X XX XXX') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 -0752 -0752 -07:52 +1969-12-31 15:00:00 -08 -0800 -08:00 +1970-12-31 04:59:59.999 -08 -0800 -08:00 +1996-03-31 07:03:33.123 -08 -0800 -08:00 +2018-11-17 05:33:33.123 -08 -0800 -08:00 +2019-12-31 09:33:33.123 -08 -0800 -08:00 +2100-01-01 01:33:33.123 -08 -0800 -08:00 + + +-- !query +select col, date_format(col, 'XXXX XXXXX') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 -075258 -07:52:58 +1969-12-31 15:00:00 -0800 -08:00 +1970-12-31 04:59:59.999 -0800 -08:00 +1996-03-31 07:03:33.123 -0800 -08:00 +2018-11-17 05:33:33.123 -0800 -08:00 +2019-12-31 09:33:33.123 -0800 -08:00 +2100-01-01 01:33:33.123 -0800 -08:00 + + +-- !query +select col, date_format(col, 'Z ZZ ZZZ ZZZZ ZZZZZ') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 -0752 -0752 -0752 GMT-07:52:58 -07:52:58 +1969-12-31 15:00:00 -0800 -0800 -0800 GMT-08:00 -08:00 +1970-12-31 04:59:59.999 -0800 -0800 -0800 GMT-08:00 -08:00 +1996-03-31 07:03:33.123 -0800 -0800 -0800 GMT-08:00 -08:00 +2018-11-17 05:33:33.123 -0800 -0800 -0800 GMT-08:00 -08:00 +2019-12-31 09:33:33.123 -0800 -0800 -0800 GMT-08:00 -08:00 +2100-01-01 01:33:33.123 -0800 -0800 -0800 GMT-08:00 -08:00 + + +-- !query +select col, date_format(col, 'O OOOO') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 GMT-7:52:58 GMT-07:52:58 +1969-12-31 15:00:00 GMT-8 GMT-08:00 +1970-12-31 04:59:59.999 GMT-8 GMT-08:00 +1996-03-31 07:03:33.123 GMT-8 GMT-08:00 +2018-11-17 05:33:33.123 GMT-8 GMT-08:00 +2019-12-31 09:33:33.123 GMT-8 GMT-08:00 +2100-01-01 01:33:33.123 GMT-8 GMT-08:00 + + +-- !query +select col, date_format(col, 'x xx xxx xxxx xxxx xxxxx') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 -0752 -0752 -07:52 -075258 -075258 -07:52:58 +1969-12-31 15:00:00 -08 -0800 -08:00 -0800 -0800 -08:00 +1970-12-31 04:59:59.999 -08 -0800 -08:00 -0800 -0800 -08:00 +1996-03-31 07:03:33.123 -08 -0800 -08:00 -0800 -0800 -08:00 +2018-11-17 05:33:33.123 -08 -0800 -08:00 -0800 -0800 -08:00 +2019-12-31 09:33:33.123 -08 -0800 -08:00 -0800 -0800 -08:00 +2100-01-01 01:33:33.123 -08 -0800 -08:00 -0800 -0800 -08:00 + + +-- !query +select col, date_format(col, '[yyyy-MM-dd HH:mm:ss]') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 1582-05-31 19:40:35 +1969-12-31 15:00:00 1969-12-31 15:00:00 +1970-12-31 04:59:59.999 1970-12-31 04:59:59 +1996-03-31 07:03:33.123 1996-03-31 07:03:33 +2018-11-17 05:33:33.123 2018-11-17 05:33:33 +2019-12-31 09:33:33.123 2019-12-31 09:33:33 +2100-01-01 01:33:33.123 2100-01-01 01:33:33 + + +-- !query +select col, date_format(col, "姚123'GyYqQMLwWuEFDdhHmsSaVzZxXOV'") from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 姚123GyYqQMLwWuEFDdhHmsSaVzZxXOV +1969-12-31 15:00:00 姚123GyYqQMLwWuEFDdhHmsSaVzZxXOV +1970-12-31 04:59:59.999 姚123GyYqQMLwWuEFDdhHmsSaVzZxXOV +1996-03-31 07:03:33.123 姚123GyYqQMLwWuEFDdhHmsSaVzZxXOV +2018-11-17 05:33:33.123 姚123GyYqQMLwWuEFDdhHmsSaVzZxXOV +2019-12-31 09:33:33.123 姚123GyYqQMLwWuEFDdhHmsSaVzZxXOV +2100-01-01 01:33:33.123 姚123GyYqQMLwWuEFDdhHmsSaVzZxXOV + + +-- !query +select col, date_format(col, "''") from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 ' +1969-12-31 15:00:00 ' +1970-12-31 04:59:59.999 ' +1996-03-31 07:03:33.123 ' +2018-11-17 05:33:33.123 ' +2019-12-31 09:33:33.123 ' +2100-01-01 01:33:33.123 ' + + +-- !query +select col, date_format(col, '') from v +-- !query schema +struct +-- !query output +1582-05-31 19:40:35.123 +1969-12-31 15:00:00 +1970-12-31 04:59:59.999 +1996-03-31 07:03:33.123 +2018-11-17 05:33:33.123 +2019-12-31 09:33:33.123 +2100-01-01 01:33:33.123 diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out index 99dd14d21e6f..38d078838ebe 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 121 +-- Number of queries: 116 -- !query @@ -706,32 +706,6 @@ struct 2019-10-06 00:00:00 --- !query -select date_format(timestamp '2019-10-06', 'yyyy-MM-dd uuee') --- !query schema -struct<> --- !query output -java.lang.IllegalArgumentException -Illegal pattern character 'e' - - --- !query -select date_format(timestamp '2019-10-06', 'yyyy-MM-dd uucc') --- !query schema -struct<> --- !query output -java.lang.IllegalArgumentException -Illegal pattern character 'c' - - --- !query -select date_format(timestamp '2019-10-06', 'yyyy-MM-dd uuuu') --- !query schema -struct --- !query output -2019-10-06 0007 - - -- !query select to_timestamp("2019-10-06T10:11:12'12", "yyyy-MM-dd'T'HH:mm:ss''SSSS") -- !query schema @@ -980,19 +954,3 @@ select date_format(date '2018-11-17', 'yyyyyyyyyyy-MM-dd') struct -- !query output 00000002018-11-17 - - --- !query -select date_format('2020-01-01', 'YYYY-MM-dd uu') --- !query schema -struct --- !query output -2020-01-01 03 - - --- !query -select date_format('2020-01-01', 'YYYY-MM-dd uuuu') --- !query schema -struct --- !query output -2020-01-01 0003 diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out index c8c568c736d7..dc4220ff6226 100755 --- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 121 +-- Number of queries: 116 -- !query @@ -706,32 +706,6 @@ struct 2019-10-06 00:00:00 --- !query -select date_format(timestamp '2019-10-06', 'yyyy-MM-dd uuee') --- !query schema -struct<> --- !query output -java.lang.IllegalArgumentException -Illegal pattern character: e - - --- !query -select date_format(timestamp '2019-10-06', 'yyyy-MM-dd uucc') --- !query schema -struct<> --- !query output -java.lang.IllegalArgumentException -Illegal pattern character: c - - --- !query -select date_format(timestamp '2019-10-06', 'yyyy-MM-dd uuuu') --- !query schema -struct --- !query output -2019-10-06 Sunday - - -- !query select to_timestamp("2019-10-06T10:11:12'12", "yyyy-MM-dd'T'HH:mm:ss''SSSS") -- !query schema @@ -997,19 +971,3 @@ struct<> -- !query output org.apache.spark.SparkUpgradeException You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyyyyyyyyy-MM-dd' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html - - --- !query -select date_format('2020-01-01', 'YYYY-MM-dd uu') --- !query schema -struct --- !query output -2020-01-01 03 - - --- !query -select date_format('2020-01-01', 'YYYY-MM-dd uuuu') --- !query schema -struct --- !query output -2020-01-01 Wednesday diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala index 5e81c74420fd..a0b212d2cf6f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala @@ -17,21 +17,27 @@ package org.apache.spark.sql.execution +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils import org.apache.spark.sql.connector.InMemoryTableCatalog +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.{ExamplePoint, ExamplePointUDT, SharedSparkSession} class HiveResultSuite extends SharedSparkSession { import testImplicits._ test("date formatting in hive result") { - val dates = Seq("2018-12-28", "1582-10-03", "1582-10-04", "1582-10-15") - val df = dates.toDF("a").selectExpr("cast(a as date) as b") - val executedPlan1 = df.queryExecution.executedPlan - val result = HiveResult.hiveResultString(executedPlan1) - assert(result == dates) - val executedPlan2 = df.selectExpr("array(b)").queryExecution.executedPlan - val result2 = HiveResult.hiveResultString(executedPlan2) - assert(result2 == dates.map(x => s"[$x]")) + DateTimeTestUtils.outstandingTimezonesIds.foreach { zoneId => + withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> zoneId) { + val dates = Seq("2018-12-28", "1582-10-03", "1582-10-04", "1582-10-15") + val df = dates.toDF("a").selectExpr("cast(a as date) as b") + val executedPlan1 = df.queryExecution.executedPlan + val result = HiveResult.hiveResultString(executedPlan1) + assert(result == dates) + val executedPlan2 = df.selectExpr("array(b)").queryExecution.executedPlan + val result2 = HiveResult.hiveResultString(executedPlan2) + assert(result2 == dates.map(x => s"[$x]")) + } + } } test("timestamp formatting in hive result") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala index d3301ced2ba1..32a9558e91f1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala @@ -60,7 +60,7 @@ abstract class ParquetPartitionDiscoverySuite val timeZoneId = ZoneId.systemDefault() val df = DateFormatter(timeZoneId) val tf = TimestampFormatter( - timestampPartitionPattern, timeZoneId, needVarLengthSecondFraction = true) + timestampPartitionPattern, timeZoneId, isParsing = true) protected override def beforeAll(): Unit = { super.beforeAll() diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala index 21256ad02c13..4e6d4e104021 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala @@ -813,31 +813,27 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest { } test("SPARK-31859 Thriftserver works with spark.sql.datetime.java8API.enabled=true") { - withJdbcStatement() { statement => - withJdbcStatement() { st => - st.execute("set spark.sql.datetime.java8API.enabled=true") - val rs = st.executeQuery("select date '2020-05-28', timestamp '2020-05-28 00:00:00'") - rs.next() - assert(rs.getDate(1).toString() == "2020-05-28") - assert(rs.getTimestamp(2).toString() == "2020-05-28 00:00:00.0") - } + withJdbcStatement() { st => + st.execute("set spark.sql.datetime.java8API.enabled=true") + val rs = st.executeQuery("select date '2020-05-28', timestamp '2020-05-28 00:00:00'") + rs.next() + assert(rs.getDate(1).toString() == "2020-05-28") + assert(rs.getTimestamp(2).toString() == "2020-05-28 00:00:00.0") } } test("SPARK-31861 Thriftserver respects spark.sql.session.timeZone") { - withJdbcStatement() { statement => - withJdbcStatement() { st => - st.execute("set spark.sql.session.timeZone=+03:15") // different than Thriftserver's JVM tz + withJdbcStatement() { st => + st.execute("set spark.sql.session.timeZone=+03:15") // different than Thriftserver's JVM tz val rs = st.executeQuery("select timestamp '2020-05-28 10:00:00'") - rs.next() - // The timestamp as string is the same as the literal - assert(rs.getString(1) == "2020-05-28 10:00:00.0") - // Parsing it to java.sql.Timestamp in the client will always result in a timestamp - // in client default JVM timezone. The string value of the Timestamp will match the literal, - // but if the JDBC application cares about the internal timezone and UTC offset of the - // Timestamp object, it should set spark.sql.session.timeZone to match its client JVM tz. - assert(rs.getTimestamp(1).toString() == "2020-05-28 10:00:00.0") - } + rs.next() + // The timestamp as string is the same as the literal + assert(rs.getString(1) == "2020-05-28 10:00:00.0") + // Parsing it to java.sql.Timestamp in the client will always result in a timestamp + // in client default JVM timezone. The string value of the Timestamp will match the literal, + // but if the JDBC application cares about the internal timezone and UTC offset of the + // Timestamp object, it should set spark.sql.session.timeZone to match its client JVM tz. + assert(rs.getTimestamp(1).toString() == "2020-05-28 10:00:00.0") } }