Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
fb10b91
Adding DateTimeFormatter
MaxGekk Dec 1, 2018
a9b39ec
Support DateTimeFormatter by JacksonParser and JacksonGenerator
MaxGekk Dec 1, 2018
ff589f5
Make test independent from current time zone
MaxGekk Dec 1, 2018
4646ded
Fix a test by new fallback
MaxGekk Dec 1, 2018
1c838e0
Set time zone explicitly
MaxGekk Dec 1, 2018
142f301
Updating the migration guide
MaxGekk Dec 1, 2018
606da21
Fix the migration guide by replacing CSV by JSON
MaxGekk Dec 1, 2018
f326042
Inlining method's arguments
MaxGekk Dec 1, 2018
4120228
A test for roundtrip timestamp parsing
MaxGekk Dec 2, 2018
6689747
Merge remote-tracking branch 'origin/master' into json-time-parser
MaxGekk Dec 2, 2018
e575162
Set time zone to GMT to eliminate of situation when time zone offset …
MaxGekk Dec 2, 2018
a35d5bf
UTC -> GMT
MaxGekk Dec 2, 2018
2a2085d
Using floorDiv to take days from seconds
MaxGekk Dec 2, 2018
55f2eac
Removing unnecessary time zone settings
MaxGekk Dec 2, 2018
57600e2
Merge remote-tracking branch 'origin/master' into json-time-parser
MaxGekk Dec 4, 2018
07fcf46
Using legacy parser in HiveCompatibilitySuite
MaxGekk Dec 5, 2018
6b6ea8a
Enable new parser in HiveCompatibilitySuit
MaxGekk Dec 7, 2018
244654b
Remove saving legacy parser settings
MaxGekk Dec 7, 2018
015fdce
Updating migration guide
MaxGekk Dec 8, 2018
96529f5
Making date parser independent from time zones
MaxGekk Dec 12, 2018
07d6031
Test refactoring
MaxGekk Dec 13, 2018
d761dee
protected is added
MaxGekk Dec 13, 2018
24b1e3d
toInstant -> toInstantWithZoneId
MaxGekk Dec 13, 2018
9a11515
Set time zone in the test
MaxGekk Dec 13, 2018
4b01d05
GMT -> UTC
MaxGekk Dec 13, 2018
0c7b96b
DateTimeFormatter -> TimestampFormatter
MaxGekk Dec 13, 2018
bbaff09
timeParser -> timestampParser
MaxGekk Dec 13, 2018
8af9df9
Round trip tests
MaxGekk Dec 14, 2018
363482e
Renaming test suite
MaxGekk Dec 14, 2018
07e0bf8
Added withClue
MaxGekk Dec 14, 2018
c12da1f
Put test under legacy time parser
MaxGekk Dec 14, 2018
60ab5b1
TODO
MaxGekk Dec 15, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Adding DateTimeFormatter
  • Loading branch information
MaxGekk committed Dec 1, 2018
commit fb10b91502b67b98f2904a06b017a6e56dd6e39f
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.catalyst.util

import java.time._
import java.time.format.DateTimeFormatterBuilder
import java.time.temporal.{ChronoField, TemporalQueries}
import java.util.{Locale, TimeZone}

import scala.util.Try

import org.apache.commons.lang3.time.FastDateFormat

import org.apache.spark.sql.internal.SQLConf

sealed trait DateTimeFormatter {
def parse(s: String): Long // returns microseconds since epoch
def format(us: Long): String
}

class Iso8601DateTimeFormatter(
pattern: String,
timeZone: TimeZone,
locale: Locale) extends DateTimeFormatter {
val formatter = new DateTimeFormatterBuilder()
.appendPattern(pattern)
.parseDefaulting(ChronoField.YEAR_OF_ERA, 1970)
.parseDefaulting(ChronoField.MONTH_OF_YEAR, 1)
.parseDefaulting(ChronoField.DAY_OF_MONTH, 1)
.parseDefaulting(ChronoField.HOUR_OF_DAY, 0)
.parseDefaulting(ChronoField.MINUTE_OF_HOUR, 0)
.parseDefaulting(ChronoField.SECOND_OF_MINUTE, 0)
.toFormatter(locale)

def toInstant(s: String): Instant = {
val temporalAccessor = formatter.parse(s)
if (temporalAccessor.query(TemporalQueries.offset()) == null) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry I'm not very familiar with this API. what does this condition mean?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

zone offset is unknown after parsing. For example, if you parse 13-12-2018 09:55:00, it is unclear in which timezone it is.

val localDateTime = LocalDateTime.from(temporalAccessor)
val zonedDateTime = ZonedDateTime.of(localDateTime, timeZone.toZoneId)
Instant.from(zonedDateTime)
} else {
Instant.from(temporalAccessor)
}
}

private def instantToMicros(instant: Instant, secMul: Long, nanoDiv: Long): Long = {
val sec = Math.multiplyExact(instant.getEpochSecond, secMul)
val result = Math.addExact(sec, instant.getNano / nanoDiv)
result
}

def parse(s: String): Long = {
instantToMicros(toInstant(s), DateTimeUtils.MICROS_PER_SECOND, DateTimeUtils.NANOS_PER_MICROS)
}

def format(us: Long): String = {
val secs = Math.floorDiv(us, DateTimeUtils.MICROS_PER_SECOND)
val mos = Math.floorMod(us, DateTimeUtils.MICROS_PER_SECOND)
val instant = Instant.ofEpochSecond(secs, mos * DateTimeUtils.NANOS_PER_MICROS)

formatter.withZone(timeZone.toZoneId).format(instant)
}
}

class LegacyDateTimeFormatter(
pattern: String,
timeZone: TimeZone,
locale: Locale) extends DateTimeFormatter {
val format = FastDateFormat.getInstance(pattern, timeZone, locale)

protected def toMillis(s: String): Long = format.parse(s).getTime

def parse(s: String): Long = toMillis(s) * DateTimeUtils.MICROS_PER_MILLIS

def format(us: Long): String = {
format.format(DateTimeUtils.toJavaTimestamp(us))
}
}

class LegacyFallbackDateTimeFormatter(
pattern: String,
timeZone: TimeZone,
locale: Locale) extends LegacyDateTimeFormatter(pattern, timeZone, locale) {
override def toMillis(s: String): Long = {
Try {super.toMillis(s)}.getOrElse(DateTimeUtils.stringToTime(s).getTime)
}
}

object DateTimeFormatter {
def apply(format: String, timeZone: TimeZone, locale: Locale): DateTimeFormatter = {
if (SQLConf.get.legacyTimeParserEnabled) {
new LegacyFallbackDateTimeFormatter(format, timeZone, locale)
} else {
new Iso8601DateTimeFormatter(format, timeZone, locale)
}
}
}

sealed trait DateFormatter {
def parse(s: String): Int // returns days since epoch
def format(days: Int): String
}

class Iso8601DateFormatter(
pattern: String,
timeZone: TimeZone,
locale: Locale) extends DateFormatter {

val dateTimeFormatter = new Iso8601DateTimeFormatter(pattern, timeZone, locale)

override def parse(s: String): Int = {
val seconds = dateTimeFormatter.toInstant(s).getEpochSecond
(seconds / DateTimeUtils.SECONDS_PER_DAY).toInt
}

override def format(days: Int): String = {
val instant = Instant.ofEpochSecond(days * DateTimeUtils.SECONDS_PER_DAY)
dateTimeFormatter.formatter.withZone(timeZone.toZoneId).format(instant)
}
}

class LegacyDateFormatter(
pattern: String,
timeZone: TimeZone,
locale: Locale) extends DateFormatter {
val format = FastDateFormat.getInstance(pattern, timeZone, locale)

def parse(s: String): Int = {
val milliseconds = format.parse(s).getTime
DateTimeUtils.millisToDays(milliseconds)
}

def format(days: Int): String = {
val date = DateTimeUtils.toJavaDate(days)
format.format(date)
}
}

class LegacyFallbackDateFormatter(
pattern: String,
timeZone: TimeZone,
locale: Locale) extends LegacyDateFormatter(pattern, timeZone, locale) {
override def parse(s: String): Int = {
Try(super.parse(s)).getOrElse {
DateTimeUtils.millisToDays(DateTimeUtils.stringToTime(s).getTime)
}
}
}

object DateFormatter {
def apply(format: String, timeZone: TimeZone, locale: Locale): DateFormatter = {
if (SQLConf.get.legacyTimeParserEnabled) {
new LegacyFallbackDateFormatter(format, timeZone, locale)
} else {
new Iso8601DateFormatter(format, timeZone, locale)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ object DateTimeUtils {
final val MILLIS_PER_SECOND = 1000L
final val NANOS_PER_SECOND = MICROS_PER_SECOND * 1000L
final val MICROS_PER_DAY = MICROS_PER_SECOND * SECONDS_PER_DAY

final val NANOS_PER_MICROS = 1000L
final val MILLIS_PER_DAY = SECONDS_PER_DAY * 1000L

// number of days in 400 years
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1618,6 +1618,13 @@ object SQLConf {
"a SparkConf entry.")
.booleanConf
.createWithDefault(true)

val LEGACY_TIME_PARSER_ENABLED = buildConf("spark.sql.legacy.timeParser.enabled")
.doc("When set to true, java.text.SimpleDateFormat is using for formatting and parsing " +
" dates/timestamps in a locale-sensitive manner. When set to false, classes from " +
"java.time.* packages are using for the same purpose.")
.booleanConf
.createWithDefault(false)
}

/**
Expand Down Expand Up @@ -2040,6 +2047,8 @@ class SQLConf extends Serializable with Logging {

def setCommandRejectsSparkConfs: Boolean = getConf(SQLConf.SET_COMMAND_REJECTS_SPARK_CONFS)

def legacyTimeParserEnabled: Boolean = getConf(SQLConf.LEGACY_TIME_PARSER_ENABLED)

/** ********************** SQLConf functionality methods ************ */

/** Set Spark SQL configuration properties. */
Expand Down