From 2af65dea7bc8cb3248cdf14bdb1744a48e3c7fd3 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Wed, 21 Apr 2021 16:33:24 +0800 Subject: [PATCH 01/14] [SPARK-35111][SQL] Support Cast string to day-second interval --- .../spark/sql/catalyst/expressions/Cast.scala | 26 +++++++++++++++++++ .../sql/catalyst/util/IntervalUtils.scala | 18 +++++++++++++ .../sql/catalyst/expressions/CastSuite.scala | 13 ++++++++++ 3 files changed, 57 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 5d799c768af3..1e45fd4b2b2f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -72,6 +72,7 @@ object Cast { case (TimestampType, DateType) => true case (StringType, CalendarIntervalType) => true + case (StringType, DayTimeIntervalType) => true case (StringType, _: NumericType) => true case (BooleanType, _: NumericType) => true @@ -533,6 +534,11 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit buildCast[UTF8String](_, s => IntervalUtils.safeStringToInterval(s)) } + private[this] def castToDayTimeInterval(from: DataType): Any => Any = from match { + case StringType => + buildCast[UTF8String](_, s => IntervalUtils.safeFromDayTimeString(s).orNull) + } + // LongConverter private[this] def castToLong(from: DataType): Any => Any = from match { case StringType if ansiEnabled => @@ -837,6 +843,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit case decimal: DecimalType => castToDecimal(from, decimal) case TimestampType => castToTimestamp(from) case CalendarIntervalType => castToInterval(from) + case DayTimeIntervalType => castToDayTimeInterval(from) case BooleanType => castToBoolean(from) case ByteType => castToByte(from) case ShortType => castToShort(from) @@ -895,6 +902,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit case decimal: DecimalType => castToDecimalCode(from, decimal, ctx) case TimestampType => castToTimestampCode(from, ctx) case CalendarIntervalType => castToIntervalCode(from) + case DayTimeIntervalType => castToDayTimeIntervalCode(from, ctx) case BooleanType => castToBooleanCode(from) case ByteType => castToByteCode(from, ctx) case ShortType => castToShortCode(from, ctx) @@ -1353,6 +1361,23 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit } + private[this] def castToDayTimeIntervalCode( + from: DataType, + ctx: CodegenContext): CastFunction = from match { + case StringType => + val util = IntervalUtils.getClass.getCanonicalName.stripSuffix("$") + val longOpt = ctx.freshVariable("intOpt", classOf[Option[Long]]) + (c, evPrim, evNull) => + code""" + scala.Option $longOpt = $util.safeFromDayTimeString($c); + if ($longOpt.isDefined()) { + $evPrim = ((Long) $longOpt.get()).longValue(); + } else { + $evNull = true; + } + """.stripMargin + } + private[this] def decimalToTimestampCode(d: ExprValue): Block = { val block = inline"new java.math.BigDecimal($MICROS_PER_SECOND)" code"($d.toBigDecimal().bigDecimal().multiply($block)).longValue()" @@ -1912,6 +1937,7 @@ object AnsiCast { case (DateType, TimestampType) => true case (StringType, _: CalendarIntervalType) => true + case (StringType, DayTimeIntervalType) => true case (StringType, DateType) => true case (TimestampType, DateType) => true diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index e52d3c881742..31186bb5d227 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -123,6 +123,24 @@ object IntervalUtils { } } + def safeFromDayTimeString(input: UTF8String): Option[Long] = { + try { + if (input == null || input.toString == null) { + throw new IllegalArgumentException("Interval day-second string must be not null") + } else { + val regex = "INTERVAL '([-|+]?[0-9]+-[-|+]?[0-9]+)' DAY TO SECOND".r + // scalastyle:off caselocale .toLowerCase + val intervalString = input.trimAll().toUpperCase.toString + // scalastyle:on + val interval = regex.findFirstMatchIn(intervalString) + .map(_.group(1)).getOrElse(intervalString) + Some(fromDayTimeString(interval).microseconds) + } + } catch { + case _: IllegalArgumentException => None + } + } + /** * Parse dayTime string in form: [-]d HH:mm:ss.nnnnnnnnn and [-]HH:mm:ss.nnnnnnnnn * diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 0554d073d1ab..09475ad6a2eb 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -1774,6 +1774,19 @@ class CastSuite extends CastSuiteBase { assert(e3.contains("Casting 2147483648 to int causes overflow")) } } + + test("SPARK-35112: Cast string to day-time interval") { + checkEvaluation(cast(Literal.create("INTERVAL '1 2:03:04' DAY TO SECOND"), + YearMonthIntervalType), 0) + checkEvaluation(cast(Literal.create("1 2:03:04"), YearMonthIntervalType), 0) + checkEvaluation(cast(Literal.create("INTERVAL '1 2:03:04' DAY TO SECOND"), + YearMonthIntervalType), -12) + checkEvaluation(cast(Literal.create("1 2:03:04"), YearMonthIntervalType), -12) + checkEvaluation(cast(Literal.create("INTERVAL '10 2:03:04' DAY TO SECOND"), + YearMonthIntervalType), 121) + checkEvaluation(cast(Literal.create("10 2:03:04"), YearMonthIntervalType), 121) + checkEvaluation(cast(Literal.create("null"), YearMonthIntervalType), null) + } } /** From b41fd25ad936ec354cb20e57adfb8314d9f6d6af Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Wed, 21 Apr 2021 17:27:46 +0800 Subject: [PATCH 02/14] fix --- .../sql/catalyst/util/IntervalUtils.scala | 2 +- .../sql/catalyst/expressions/CastSuite.scala | 19 ++++++++++--------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index 31186bb5d227..ea77876548d2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -128,7 +128,7 @@ object IntervalUtils { if (input == null || input.toString == null) { throw new IllegalArgumentException("Interval day-second string must be not null") } else { - val regex = "INTERVAL '([-|+]?[0-9]+-[-|+]?[0-9]+)' DAY TO SECOND".r + val regex = "INTERVAL '([-|+]?[0-9]+ [0-9:]+)' DAY TO SECOND".r // scalastyle:off caselocale .toLowerCase val intervalString = input.trimAll().toUpperCase.toString // scalastyle:on diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 09475ad6a2eb..4d211397360e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -1776,16 +1776,17 @@ class CastSuite extends CastSuiteBase { } test("SPARK-35112: Cast string to day-time interval") { + checkEvaluation(cast(Literal.create("INTERVAL '0 0:0:0' DAY TO SECOND"), + DayTimeIntervalType), 0L) + checkEvaluation(cast(Literal.create("0 0:0:0"), DayTimeIntervalType), 0L) checkEvaluation(cast(Literal.create("INTERVAL '1 2:03:04' DAY TO SECOND"), - YearMonthIntervalType), 0) - checkEvaluation(cast(Literal.create("1 2:03:04"), YearMonthIntervalType), 0) - checkEvaluation(cast(Literal.create("INTERVAL '1 2:03:04' DAY TO SECOND"), - YearMonthIntervalType), -12) - checkEvaluation(cast(Literal.create("1 2:03:04"), YearMonthIntervalType), -12) - checkEvaluation(cast(Literal.create("INTERVAL '10 2:03:04' DAY TO SECOND"), - YearMonthIntervalType), 121) - checkEvaluation(cast(Literal.create("10 2:03:04"), YearMonthIntervalType), 121) - checkEvaluation(cast(Literal.create("null"), YearMonthIntervalType), null) + DayTimeIntervalType), 7384000000L) + checkEvaluation(cast(Literal.create("1 2:03:04"), DayTimeIntervalType), 7384000000L) + checkEvaluation(cast(Literal.create("INTERVAL '-10 2:03:04' DAY TO SECOND"), + DayTimeIntervalType), -7384000000L) + + checkEvaluation(cast(Literal.create("-10 2:03:04"), DayTimeIntervalType), -7384000000L) + checkEvaluation(cast(Literal.create("null"), DayTimeIntervalType), null) } } From a82bfeeaf36b19802caf760b127ff0eb72208d34 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Wed, 28 Apr 2021 22:17:39 +0800 Subject: [PATCH 03/14] update --- .../spark/sql/catalyst/expressions/Cast.scala | 4 +- .../sql/catalyst/util/IntervalUtils.scala | 48 +++++++++++++------ 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 1e45fd4b2b2f..b375530b32eb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -536,7 +536,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit private[this] def castToDayTimeInterval(from: DataType): Any => Any = from match { case StringType => - buildCast[UTF8String](_, s => IntervalUtils.safeFromDayTimeString(s).orNull) + buildCast[UTF8String](_, s => IntervalUtils.castStringToDTInterval(s).microseconds) } // LongConverter @@ -1369,7 +1369,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit val longOpt = ctx.freshVariable("intOpt", classOf[Option[Long]]) (c, evPrim, evNull) => code""" - scala.Option $longOpt = $util.safeFromDayTimeString($c); + scala.Option $longOpt = $util.castStringToDTInterval($c).microseconds; if ($longOpt.isDefined()) { $evPrim = ((Long) $longOpt.get()).longValue(); } else { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index ea77876548d2..785e5de31426 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.util import java.time.{Duration, Period} import java.time.temporal.ChronoUnit +import java.util.Locale import java.util.concurrent.TimeUnit import scala.util.control.NonFatal @@ -123,22 +124,39 @@ object IntervalUtils { } } - def safeFromDayTimeString(input: UTF8String): Option[Long] = { - try { - if (input == null || input.toString == null) { - throw new IllegalArgumentException("Interval day-second string must be not null") - } else { - val regex = "INTERVAL '([-|+]?[0-9]+ [0-9:]+)' DAY TO SECOND".r - // scalastyle:off caselocale .toLowerCase - val intervalString = input.trimAll().toUpperCase.toString - // scalastyle:on - val interval = regex.findFirstMatchIn(intervalString) - .map(_.group(1)).getOrElse(intervalString) - Some(fromDayTimeString(interval).microseconds) - } - } catch { - case _: IllegalArgumentException => None + private val daySecondStringPattern = + ("(INTERVAL )?([+|-])*?(')?" + + "([+|-])?((\\d+) )?((\\d+):)?(\\d+):(\\d+)(\\.(\\d+))?(')?( DAY TO SECOND)?").r + + def castStringToDTInterval(input: UTF8String): CalendarInterval = { + // scalastyle:off caselocale .toUpperCase + input.trimAll().toString.toUpperCase(Locale.ROOT) match { + case daySecondStringPattern("INTERVAL ", prefixSign, "'", + suffixSign, day, hour, minute, second, micro, "'", " DAY TO SECOND") => + (prefixSign, suffixSign) match { + case ("-", "-") => fromYearMonthString(s"$day $hour:$minute:$second.$micro") + case ("-", _) => fromYearMonthString(s"-$day $hour:$minute:$second.$micro") + case (_, _) if suffixSign != null => + fromYearMonthString(s"$suffixSign$day $hour:$minute:$second.$micro") + case (_, _) => fromYearMonthString(s"$day $hour:$minute:$second.$micro") + } + case daySecondStringPattern( + "INTERVAL ", null, "'", "-", day, hour, minute, second, micro, "'", " DAY TO SECOND") => + fromYearMonthString(s"-$day $hour:$minute:$second.$micro") + case daySecondStringPattern( + "INTERVAL ", null, "'", _, day, hour, minute, second, micro, "'", " DAY TO SECOND") => + fromYearMonthString(s"$day $hour:$minute:$second.$micro") + case daySecondStringPattern( + null, null, null, "-", day, hour, minute, second, micro, null, null) => + fromYearMonthString(s"-$day $hour:$minute:$second.$micro") + case daySecondStringPattern( + null, null, null, _, day, hour, minute, second, micro, null, null) => + fromYearMonthString(s"$day $hour:$minute:$second.$micro") + case daySecondStringPattern(_, _, _, _, _, _, _, _, _, _, _) => + throw new IllegalArgumentException( + s"Interval string does not match year-month format of 'y-m': ${input.toString}") } + // scalastyle:on } /** From e0b3b30c3f96ddaa1f48668dfdd3f555182d8221 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Thu, 29 Apr 2021 14:11:10 +0800 Subject: [PATCH 04/14] save --- .../sql/catalyst/util/IntervalUtils.scala | 44 ++++++++----------- .../sql/catalyst/expressions/CastSuite.scala | 10 +++++ 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index 785e5de31426..5b1138a00251 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -124,39 +124,31 @@ object IntervalUtils { } } - private val daySecondStringPattern = - ("(INTERVAL )?([+|-])*?(')?" + - "([+|-])?((\\d+) )?((\\d+):)?(\\d+):(\\d+)(\\.(\\d+))?(')?( DAY TO SECOND)?").r + private val daySecondStringPattern = ("^(INTERVAL\\s+)([+|-])?(')" + + "([+|-])?(\\d+ )?(\\d+:)?(\\d+):(\\d+)(\\.\\d+)?(')(\\s+DAY TO SECOND)$").r + private val daySecondNumPattern = "([+|-])?(\\d+ )?(\\d+:)?(\\d+):(\\d+)(\\.\\d+)?".r def castStringToDTInterval(input: UTF8String): CalendarInterval = { - // scalastyle:off caselocale .toUpperCase - input.trimAll().toString.toUpperCase(Locale.ROOT) match { - case daySecondStringPattern("INTERVAL ", prefixSign, "'", - suffixSign, day, hour, minute, second, micro, "'", " DAY TO SECOND") => + val intervalStr = input.trimAll().toString.toUpperCase(Locale.ROOT) + intervalStr match { + case dayTimePatternLegacy(_, _, _, _, _, _) => + fromDayTimeString(intervalStr) + case daySecondStringPattern(_, prefixSign, _, suffixSign, _, _, _, _, _, _, _) => + val dtStr = + "^([+|-])".r.replaceAllIn(daySecondNumPattern.findFirstIn(intervalStr).get, "") (prefixSign, suffixSign) match { - case ("-", "-") => fromYearMonthString(s"$day $hour:$minute:$second.$micro") - case ("-", _) => fromYearMonthString(s"-$day $hour:$minute:$second.$micro") - case (_, _) if suffixSign != null => - fromYearMonthString(s"$suffixSign$day $hour:$minute:$second.$micro") - case (_, _) => fromYearMonthString(s"$day $hour:$minute:$second.$micro") + case ("-", "-") => fromDayTimeString(dtStr) + case ("-", _) => fromDayTimeString(s"-$dtStr") + case (_, _) => fromDayTimeString(dtStr) } - case daySecondStringPattern( - "INTERVAL ", null, "'", "-", day, hour, minute, second, micro, "'", " DAY TO SECOND") => - fromYearMonthString(s"-$day $hour:$minute:$second.$micro") - case daySecondStringPattern( - "INTERVAL ", null, "'", _, day, hour, minute, second, micro, "'", " DAY TO SECOND") => - fromYearMonthString(s"$day $hour:$minute:$second.$micro") - case daySecondStringPattern( - null, null, null, "-", day, hour, minute, second, micro, null, null) => - fromYearMonthString(s"-$day $hour:$minute:$second.$micro") - case daySecondStringPattern( - null, null, null, _, day, hour, minute, second, micro, null, null) => - fromYearMonthString(s"$day $hour:$minute:$second.$micro") + case daySecondStringPattern(_, null, _, _, _, _, _, _, _, _, _) => + val dtStr = daySecondNumPattern.findFirstIn(intervalStr).get + fromDayTimeString(dtStr) case daySecondStringPattern(_, _, _, _, _, _, _, _, _, _, _) => throw new IllegalArgumentException( - s"Interval string does not match year-month format of 'y-m': ${input.toString}") + s"Interval string must match day-time format of 'd h:m:s.n': ${input.toString}, " + + s"$fallbackNotice") } - // scalastyle:on } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 4d211397360e..e6a0715c4296 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -1781,6 +1781,16 @@ class CastSuite extends CastSuiteBase { checkEvaluation(cast(Literal.create("0 0:0:0"), DayTimeIntervalType), 0L) checkEvaluation(cast(Literal.create("INTERVAL '1 2:03:04' DAY TO SECOND"), DayTimeIntervalType), 7384000000L) + checkEvaluation(cast(Literal.create("INTERVAL '2:03:04' DAY TO SECOND"), + DayTimeIntervalType), 7384000000L) + checkEvaluation(cast(Literal.create("INTERVAL '03:04' DAY TO SECOND"), + DayTimeIntervalType), 7384000000L) + checkEvaluation(cast(Literal.create("INTERVAL '03:04.0000' DAY TO SECOND"), + DayTimeIntervalType), 7384000000L) + checkEvaluation(cast(Literal.create("INTERVAL '01:03:04.0000' DAY TO SECOND"), + DayTimeIntervalType), 7384000000L) + checkEvaluation(cast(Literal.create("INTERVAL '1 1:03:04.0000' DAY TO SECOND"), + DayTimeIntervalType), 7384000000L) checkEvaluation(cast(Literal.create("1 2:03:04"), DayTimeIntervalType), 7384000000L) checkEvaluation(cast(Literal.create("INTERVAL '-10 2:03:04' DAY TO SECOND"), DayTimeIntervalType), -7384000000L) From 616380e52783590d616159f8e871d3950cbd8890 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Thu, 29 Apr 2021 15:39:19 +0800 Subject: [PATCH 05/14] save --- .../spark/sql/catalyst/expressions/Cast.scala | 16 +++------------- .../spark/sql/catalyst/util/IntervalUtils.scala | 15 ++++++++------- 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index b375530b32eb..e0e9e4559fd6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -902,7 +902,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit case decimal: DecimalType => castToDecimalCode(from, decimal, ctx) case TimestampType => castToTimestampCode(from, ctx) case CalendarIntervalType => castToIntervalCode(from) - case DayTimeIntervalType => castToDayTimeIntervalCode(from, ctx) + case DayTimeIntervalType => castToDayTimeIntervalCode(from) case BooleanType => castToBooleanCode(from) case ByteType => castToByteCode(from, ctx) case ShortType => castToShortCode(from, ctx) @@ -1361,21 +1361,11 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit } - private[this] def castToDayTimeIntervalCode( - from: DataType, - ctx: CodegenContext): CastFunction = from match { + private[this] def castToDayTimeIntervalCode(from: DataType): CastFunction = from match { case StringType => val util = IntervalUtils.getClass.getCanonicalName.stripSuffix("$") - val longOpt = ctx.freshVariable("intOpt", classOf[Option[Long]]) (c, evPrim, evNull) => - code""" - scala.Option $longOpt = $util.castStringToDTInterval($c).microseconds; - if ($longOpt.isDefined()) { - $evPrim = ((Long) $longOpt.get()).longValue(); - } else { - $evNull = true; - } - """.stripMargin + code"$evPrim = $util.castStringToDTInterval($c).microseconds;" } private[this] def decimalToTimestampCode(d: ExprValue): Block = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index 5b1138a00251..2d12a764f038 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -131,19 +131,20 @@ object IntervalUtils { def castStringToDTInterval(input: UTF8String): CalendarInterval = { val intervalStr = input.trimAll().toString.toUpperCase(Locale.ROOT) intervalStr match { - case dayTimePatternLegacy(_, _, _, _, _, _) => - fromDayTimeString(intervalStr) - case daySecondStringPattern(_, prefixSign, _, suffixSign, _, _, _, _, _, _, _) => + case daySecondNumPattern(_, _, _, _, _, _) => + fromDayTimeString(intervalStr, DAY, SECOND) + case daySecondNumPattern(_, prefixSign, _, suffixSign, _, _, _, _, _, _, _) => val dtStr = "^([+|-])".r.replaceAllIn(daySecondNumPattern.findFirstIn(intervalStr).get, "") (prefixSign, suffixSign) match { - case ("-", "-") => fromDayTimeString(dtStr) - case ("-", _) => fromDayTimeString(s"-$dtStr") - case (_, _) => fromDayTimeString(dtStr) + case ("-", "-") => fromDayTimeString(dtStr, DAY, SECOND) + case ("-", _) => fromDayTimeString(s"-$dtStr", DAY, SECOND) + case (_, _) => fromDayTimeString(dtStr, DAY, SECOND) } case daySecondStringPattern(_, null, _, _, _, _, _, _, _, _, _) => val dtStr = daySecondNumPattern.findFirstIn(intervalStr).get - fromDayTimeString(dtStr) + println(dtStr) + fromDayTimeString(dtStr, DAY, SECOND) case daySecondStringPattern(_, _, _, _, _, _, _, _, _, _, _) => throw new IllegalArgumentException( s"Interval string must match day-time format of 'd h:m:s.n': ${input.toString}, " + From 35dd9f45aa41d7a56de78f20b713dee63cbc4b8a Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Thu, 29 Apr 2021 16:10:33 +0800 Subject: [PATCH 06/14] update --- .../spark/sql/catalyst/util/IntervalUtils.scala | 3 +-- .../sql/catalyst/expressions/CastSuite.scala | 15 ++++----------- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index 2d12a764f038..eff1774008f9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -143,9 +143,8 @@ object IntervalUtils { } case daySecondStringPattern(_, null, _, _, _, _, _, _, _, _, _) => val dtStr = daySecondNumPattern.findFirstIn(intervalStr).get - println(dtStr) fromDayTimeString(dtStr, DAY, SECOND) - case daySecondStringPattern(_, _, _, _, _, _, _, _, _, _, _) => + case _ => throw new IllegalArgumentException( s"Interval string must match day-time format of 'd h:m:s.n': ${input.toString}, " + s"$fallbackNotice") diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index e6a0715c4296..add193024c5f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -1781,22 +1781,15 @@ class CastSuite extends CastSuiteBase { checkEvaluation(cast(Literal.create("0 0:0:0"), DayTimeIntervalType), 0L) checkEvaluation(cast(Literal.create("INTERVAL '1 2:03:04' DAY TO SECOND"), DayTimeIntervalType), 7384000000L) - checkEvaluation(cast(Literal.create("INTERVAL '2:03:04' DAY TO SECOND"), - DayTimeIntervalType), 7384000000L) - checkEvaluation(cast(Literal.create("INTERVAL '03:04' DAY TO SECOND"), - DayTimeIntervalType), 7384000000L) - checkEvaluation(cast(Literal.create("INTERVAL '03:04.0000' DAY TO SECOND"), - DayTimeIntervalType), 7384000000L) - checkEvaluation(cast(Literal.create("INTERVAL '01:03:04.0000' DAY TO SECOND"), - DayTimeIntervalType), 7384000000L) - checkEvaluation(cast(Literal.create("INTERVAL '1 1:03:04.0000' DAY TO SECOND"), - DayTimeIntervalType), 7384000000L) + checkEvaluation(cast(Literal.create("INTERVAL '1 03:04:00' DAY TO SECOND"), + DayTimeIntervalType), 11040000000L) + checkEvaluation(cast(Literal.create("INTERVAL '1 03:04:00.0000' DAY TO SECOND"), + DayTimeIntervalType), 11040000000L) checkEvaluation(cast(Literal.create("1 2:03:04"), DayTimeIntervalType), 7384000000L) checkEvaluation(cast(Literal.create("INTERVAL '-10 2:03:04' DAY TO SECOND"), DayTimeIntervalType), -7384000000L) checkEvaluation(cast(Literal.create("-10 2:03:04"), DayTimeIntervalType), -7384000000L) - checkEvaluation(cast(Literal.create("null"), DayTimeIntervalType), null) } } From daa96753ae2292077cd8ac53b397ef53c5acb739 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Thu, 29 Apr 2021 17:03:59 +0800 Subject: [PATCH 07/14] done --- .../spark/sql/catalyst/util/IntervalUtils.scala | 17 ++++++++--------- .../sql/catalyst/expressions/CastSuite.scala | 2 +- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index eff1774008f9..e48754d31e3f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -125,25 +125,24 @@ object IntervalUtils { } private val daySecondStringPattern = ("^(INTERVAL\\s+)([+|-])?(')" + - "([+|-])?(\\d+ )?(\\d+:)?(\\d+):(\\d+)(\\.\\d+)?(')(\\s+DAY TO SECOND)$").r - private val daySecondNumPattern = "([+|-])?(\\d+ )?(\\d+:)?(\\d+):(\\d+)(\\.\\d+)?".r + "([+|-])?(\\d+) (\\d{1,2}):(\\d{1,2}):(\\d{1,2})(\\.\\d{1,9})?(')(\\s+DAY\\s+TO\\s+SECOND)$").r + private val daySecondPattern = "^([+|-])?(\\d+) (\\d{1,2}):(\\d{1,2}):(\\d{1,2})(\\.\\d{1,9})?$".r def castStringToDTInterval(input: UTF8String): CalendarInterval = { val intervalStr = input.trimAll().toString.toUpperCase(Locale.ROOT) + val ansiDaySecondPattern = + "([+|-])?(\\d+) (\\d{1,2}):(\\d{1,2}):(\\d{1,2})(\\.\\d{1,9})?".r intervalStr match { - case daySecondNumPattern(_, _, _, _, _, _) => - fromDayTimeString(intervalStr, DAY, SECOND) - case daySecondNumPattern(_, prefixSign, _, suffixSign, _, _, _, _, _, _, _) => + case daySecondPattern(_, _, _, _, _, _) => fromDayTimeString(intervalStr, DAY, SECOND) + case daySecondStringPattern(_, prefixSign, _, suffixSign, _, _, _, _, _, _, _) => val dtStr = - "^([+|-])".r.replaceAllIn(daySecondNumPattern.findFirstIn(intervalStr).get, "") + "^([+|-])".r.replaceAllIn(ansiDaySecondPattern.findFirstIn(intervalStr).get, "") (prefixSign, suffixSign) match { case ("-", "-") => fromDayTimeString(dtStr, DAY, SECOND) case ("-", _) => fromDayTimeString(s"-$dtStr", DAY, SECOND) + case (_, "-") => fromDayTimeString(s"-$dtStr", DAY, SECOND) case (_, _) => fromDayTimeString(dtStr, DAY, SECOND) } - case daySecondStringPattern(_, null, _, _, _, _, _, _, _, _, _) => - val dtStr = daySecondNumPattern.findFirstIn(intervalStr).get - fromDayTimeString(dtStr, DAY, SECOND) case _ => throw new IllegalArgumentException( s"Interval string must match day-time format of 'd h:m:s.n': ${input.toString}, " + diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index add193024c5f..e88091b7b1ba 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -1776,9 +1776,9 @@ class CastSuite extends CastSuiteBase { } test("SPARK-35112: Cast string to day-time interval") { + checkEvaluation(cast(Literal.create("0 0:0:0"), DayTimeIntervalType), 0L) checkEvaluation(cast(Literal.create("INTERVAL '0 0:0:0' DAY TO SECOND"), DayTimeIntervalType), 0L) - checkEvaluation(cast(Literal.create("0 0:0:0"), DayTimeIntervalType), 0L) checkEvaluation(cast(Literal.create("INTERVAL '1 2:03:04' DAY TO SECOND"), DayTimeIntervalType), 7384000000L) checkEvaluation(cast(Literal.create("INTERVAL '1 03:04:00' DAY TO SECOND"), From c2c702198416aa7fa26553302ff81c597b35c224 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Fri, 30 Apr 2021 14:57:49 +0800 Subject: [PATCH 08/14] update --- .../spark/sql/catalyst/util/IntervalUtils.scala | 8 ++++---- .../spark/sql/catalyst/expressions/CastSuite.scala | 11 ++++++++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index e48754d31e3f..b0710b5600c6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql.catalyst.util import java.time.{Duration, Period} import java.time.temporal.ChronoUnit -import java.util.Locale import java.util.concurrent.TimeUnit import scala.util.control.NonFatal @@ -124,12 +123,12 @@ object IntervalUtils { } } - private val daySecondStringPattern = ("^(INTERVAL\\s+)([+|-])?(')" + + private val daySecondStringPattern = ("(?i)^(INTERVAL\\s+)([+|-])?(')" + "([+|-])?(\\d+) (\\d{1,2}):(\\d{1,2}):(\\d{1,2})(\\.\\d{1,9})?(')(\\s+DAY\\s+TO\\s+SECOND)$").r private val daySecondPattern = "^([+|-])?(\\d+) (\\d{1,2}):(\\d{1,2}):(\\d{1,2})(\\.\\d{1,9})?$".r def castStringToDTInterval(input: UTF8String): CalendarInterval = { - val intervalStr = input.trimAll().toString.toUpperCase(Locale.ROOT) + val intervalStr = input.trimAll().toString val ansiDaySecondPattern = "([+|-])?(\\d+) (\\d{1,2}):(\\d{1,2}):(\\d{1,2})(\\.\\d{1,9})?".r intervalStr match { @@ -145,7 +144,8 @@ object IntervalUtils { } case _ => throw new IllegalArgumentException( - s"Interval string must match day-time format of 'd h:m:s.n': ${input.toString}, " + + s"Interval string must match day-time format of `d h:m:s.n` " + + s"or `INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND`: ${input.toString}, " + s"$fallbackNotice") } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index e88091b7b1ba..f62e0e798ef3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -1788,8 +1788,17 @@ class CastSuite extends CastSuiteBase { checkEvaluation(cast(Literal.create("1 2:03:04"), DayTimeIntervalType), 7384000000L) checkEvaluation(cast(Literal.create("INTERVAL '-10 2:03:04' DAY TO SECOND"), DayTimeIntervalType), -7384000000L) - checkEvaluation(cast(Literal.create("-10 2:03:04"), DayTimeIntervalType), -7384000000L) + checkEvaluation(cast(Literal.create("-106751991 04:00:54.775808"), DayTimeIntervalType), + -14454775808L) + checkEvaluation(cast(Literal.create("106751991 04:00:54.775807"), DayTimeIntervalType), + 14454775807L) + + Seq(Byte.MaxValue, Short.MaxValue, Int.MaxValue, Long.MaxValue, Long.MinValue + 1, + Long.MinValue).foreach { period => + val interval = Literal.create(Duration.of(period, ChronoUnit.MICROS), DayTimeIntervalType) + checkEvaluation(cast(cast(interval, StringType), DayTimeIntervalType), period) + } } } From 700efa4260eb9c4b8dae2ae241ed9980db517474 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Fri, 30 Apr 2021 15:33:43 +0800 Subject: [PATCH 09/14] update --- .../spark/sql/catalyst/expressions/Cast.scala | 4 +- .../sql/catalyst/util/IntervalUtils.scala | 8 +++- .../sql/catalyst/expressions/CastSuite.scala | 41 +++++++++++++------ 3 files changed, 37 insertions(+), 16 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index e0e9e4559fd6..fb0cf26a522c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -536,7 +536,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit private[this] def castToDayTimeInterval(from: DataType): Any => Any = from match { case StringType => - buildCast[UTF8String](_, s => IntervalUtils.castStringToDTInterval(s).microseconds) + buildCast[UTF8String](_, s => IntervalUtils.castStringToDTInterval(s)) } // LongConverter @@ -1365,7 +1365,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit case StringType => val util = IntervalUtils.getClass.getCanonicalName.stripSuffix("$") (c, evPrim, evNull) => - code"$evPrim = $util.castStringToDTInterval($c).microseconds;" + code"$evPrim = $util.castStringToDTInterval($c);" } private[this] def decimalToTimestampCode(d: ExprValue): Block = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index b0710b5600c6..d33bc80c7e25 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -127,11 +127,14 @@ object IntervalUtils { "([+|-])?(\\d+) (\\d{1,2}):(\\d{1,2}):(\\d{1,2})(\\.\\d{1,9})?(')(\\s+DAY\\s+TO\\s+SECOND)$").r private val daySecondPattern = "^([+|-])?(\\d+) (\\d{1,2}):(\\d{1,2}):(\\d{1,2})(\\.\\d{1,9})?$".r - def castStringToDTInterval(input: UTF8String): CalendarInterval = { + def castStringToDTInterval(input: UTF8String): Long = { + def calendarToMicros(calendar: CalendarInterval): Long = { + getDuration(calendar, TimeUnit.MICROSECONDS) + } val intervalStr = input.trimAll().toString val ansiDaySecondPattern = "([+|-])?(\\d+) (\\d{1,2}):(\\d{1,2}):(\\d{1,2})(\\.\\d{1,9})?".r - intervalStr match { + val calendar = intervalStr match { case daySecondPattern(_, _, _, _, _, _) => fromDayTimeString(intervalStr, DAY, SECOND) case daySecondStringPattern(_, prefixSign, _, suffixSign, _, _, _, _, _, _, _) => val dtStr = @@ -148,6 +151,7 @@ object IntervalUtils { s"or `INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND`: ${input.toString}, " + s"$fallbackNotice") } + calendarToMicros(calendar) } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index f62e0e798ef3..013331c473c9 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -32,8 +32,8 @@ import org.apache.spark.sql.catalyst.analysis.TypeCoercion.numericPrecedence import org.apache.spark.sql.catalyst.analysis.TypeCoercionSuite import org.apache.spark.sql.catalyst.expressions.aggregate.{CollectList, CollectSet} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext +import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, IntervalUtils} import org.apache.spark.sql.catalyst.util.DateTimeConstants._ -import org.apache.spark.sql.catalyst.util.DateTimeTestUtils import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ import org.apache.spark.sql.catalyst.util.DateTimeUtils._ import org.apache.spark.sql.catalyst.util.IntervalUtils.microsToDuration @@ -1776,28 +1776,45 @@ class CastSuite extends CastSuiteBase { } test("SPARK-35112: Cast string to day-time interval") { + val interval = IntervalUtils.fromDayTimeString("106751991 04:00:54.775807") checkEvaluation(cast(Literal.create("0 0:0:0"), DayTimeIntervalType), 0L) checkEvaluation(cast(Literal.create("INTERVAL '0 0:0:0' DAY TO SECOND"), DayTimeIntervalType), 0L) checkEvaluation(cast(Literal.create("INTERVAL '1 2:03:04' DAY TO SECOND"), - DayTimeIntervalType), 7384000000L) + DayTimeIntervalType), 93784000000L) checkEvaluation(cast(Literal.create("INTERVAL '1 03:04:00' DAY TO SECOND"), - DayTimeIntervalType), 11040000000L) + DayTimeIntervalType), 97440000000L) checkEvaluation(cast(Literal.create("INTERVAL '1 03:04:00.0000' DAY TO SECOND"), - DayTimeIntervalType), 11040000000L) - checkEvaluation(cast(Literal.create("1 2:03:04"), DayTimeIntervalType), 7384000000L) + DayTimeIntervalType), 97440000000L) + checkEvaluation(cast(Literal.create("1 2:03:04"), DayTimeIntervalType), 93784000000L) checkEvaluation(cast(Literal.create("INTERVAL '-10 2:03:04' DAY TO SECOND"), - DayTimeIntervalType), -7384000000L) - checkEvaluation(cast(Literal.create("-10 2:03:04"), DayTimeIntervalType), -7384000000L) + DayTimeIntervalType), -871384000000L) + checkEvaluation(cast(Literal.create("-10 2:03:04"), DayTimeIntervalType), -871384000000L) checkEvaluation(cast(Literal.create("-106751991 04:00:54.775808"), DayTimeIntervalType), - -14454775808L) + Long.MinValue) checkEvaluation(cast(Literal.create("106751991 04:00:54.775807"), DayTimeIntervalType), - 14454775807L) + Long.MaxValue) + + Seq("-106751991 04:00:54.775808", "106751991 04:00:54.775807").foreach { interval => + val ansiInterval = s"INTERVAL '$interval' DAY TO SECOND" + checkEvaluation( + cast(cast(Literal.create(interval), DayTimeIntervalType), StringType), ansiInterval) + checkEvaluation(cast(cast(Literal.create(ansiInterval), + DayTimeIntervalType), StringType), ansiInterval) + } + + Seq("INTERVAL '-106751991 04:00:54.775809' YEAR TO MONTH", + "INTERVAL '106751991 04:00:54.775808' YEAR TO MONTH").foreach { interval => + val e = intercept[IllegalArgumentException] { + cast(Literal.create(interval), DayTimeIntervalType).eval() + }.getMessage + assert(e.contains("Interval string must match day-time format of")) + } Seq(Byte.MaxValue, Short.MaxValue, Int.MaxValue, Long.MaxValue, Long.MinValue + 1, - Long.MinValue).foreach { period => - val interval = Literal.create(Duration.of(period, ChronoUnit.MICROS), DayTimeIntervalType) - checkEvaluation(cast(cast(interval, StringType), DayTimeIntervalType), period) + Long.MinValue).foreach { duration => + val interval = Literal.create(Duration.of(duration, ChronoUnit.MICROS), DayTimeIntervalType) + checkEvaluation(cast(cast(interval, StringType), DayTimeIntervalType), duration) } } } From 9fe4c387f1b1f22fa741bb7a94af61d269836b05 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Sat, 1 May 2021 10:45:23 +0800 Subject: [PATCH 10/14] save --- .../spark/sql/catalyst/expressions/Cast.scala | 9 +-- .../sql/catalyst/util/IntervalUtils.scala | 75 ++++++++++++------- 2 files changed, 53 insertions(+), 31 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index a38488dce833..43f85efb9207 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -537,13 +537,11 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit } private[this] def castToDayTimeInterval(from: DataType): Any => Any = from match { - case StringType => - buildCast[UTF8String](_, s => IntervalUtils.castStringToDTInterval(s)) + case StringType => buildCast[UTF8String](_, s => IntervalUtils.castStringToDTInterval(s)) } private[this] def castToYearMonthInterval(from: DataType): Any => Any = from match { - case StringType => - buildCast[UTF8String](_, s => IntervalUtils.castStringToYMInterval(s)) + case StringType => buildCast[UTF8String](_, s => IntervalUtils.castStringToYMInterval(s)) } // LongConverter @@ -1373,8 +1371,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit private[this] def castToDayTimeIntervalCode(from: DataType): CastFunction = from match { case StringType => val util = IntervalUtils.getClass.getCanonicalName.stripSuffix("$") - (c, evPrim, evNull) => - code"$evPrim = $util.castStringToDTInterval($c);" + (c, evPrim, evNull) => code"$evPrim = $util.castStringToDTInterval($c);" } private[this] def castToYearMonthIntervalCode(from: DataType): CastFunction = from match { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index 245c51b6b22f..e41d9103dcaf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -138,27 +138,48 @@ object IntervalUtils { } } + def toYMInterval(yearStr: String, monthStr: String, sign: Int): Int = { + try { + val years = toLongWithRange(YEAR, yearStr, 0, Integer.MAX_VALUE / MONTHS_PER_YEAR) + val totalMonths = sign * (years * MONTHS_PER_YEAR + toLongWithRange(MONTH, monthStr, 0, 11)) + Math.toIntExact(totalMonths) + } catch { + case NonFatal(e) => + throw new IllegalArgumentException( + s"Error parsing interval year-month string: ${e.getMessage}", e) + } + } + private val daySecondStringPattern = ("(?i)^(INTERVAL\\s+)([+|-])?(')" + "([+|-])?(\\d+) (\\d{1,2}):(\\d{1,2}):(\\d{1,2})(\\.\\d{1,9})?(')(\\s+DAY\\s+TO\\s+SECOND)$").r private val daySecondPattern = "^([+|-])?(\\d+) (\\d{1,2}):(\\d{1,2}):(\\d{1,2})(\\.\\d{1,9})?$".r def castStringToDTInterval(input: UTF8String): Long = { - def calendarToMicros(calendar: CalendarInterval): Long = { - getDuration(calendar, TimeUnit.MICROSECONDS) + def secondAndMicro(second: String, micro: String): String = { + if (micro != null) { + s"$second$micro" + } else { + second + } } +// val regex = dayTimePattern(DAY -> SECOND) val intervalStr = input.trimAll().toString - val ansiDaySecondPattern = - "([+|-])?(\\d+) (\\d{1,2}):(\\d{1,2}):(\\d{1,2})(\\.\\d{1,9})?".r - val calendar = intervalStr match { - case daySecondPattern(_, _, _, _, _, _) => fromDayTimeString(intervalStr, DAY, SECOND) - case daySecondStringPattern(_, prefixSign, _, suffixSign, _, _, _, _, _, _, _) => - val dtStr = - "^([+|-])".r.replaceAllIn(ansiDaySecondPattern.findFirstIn(intervalStr).get, "") - (prefixSign, suffixSign) match { - case ("-", "-") => fromDayTimeString(dtStr, DAY, SECOND) - case ("-", _) => fromDayTimeString(s"-$dtStr", DAY, SECOND) - case (_, "-") => fromDayTimeString(s"-$dtStr", DAY, SECOND) - case (_, _) => fromDayTimeString(dtStr, DAY, SECOND) + intervalStr match { +// case regex("-", day, hour, minute, secondPer) => +// toDTInterval(day, hour, minute, secondPer, -1) +// case regex(_, day, hour, minute, secondPer) => +// toDTInterval(day, hour, minute, secondPer, 1) + case daySecondPattern("-", day, hour, minute, second, micro) => + toDTInterval(day, hour, minute, secondAndMicro(second, micro), -1) + case daySecondPattern(_, day, hour, minute, second, micro) => + toDTInterval(day, hour, minute, secondAndMicro(second, micro), 1) + case daySecondStringPattern( + _, firstSign, _, secondSign, day, hour, minute, second, micro, _, _) => + (firstSign, secondSign) match { + case ("-", "-") => toDTInterval(day, hour, minute, secondAndMicro(second, micro), 1) + case ("-", _) => toDTInterval(day, hour, minute, secondAndMicro(second, micro), -1) + case (_, "-") => toDTInterval(day, hour, minute, secondAndMicro(second, micro), -1) + case (_, _) => toDTInterval(day, hour, minute, secondAndMicro(second, micro), 1) } case _ => throw new IllegalArgumentException( @@ -166,19 +187,23 @@ object IntervalUtils { s"or `INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND`: ${input.toString}, " + s"$fallbackNotice") } - calendarToMicros(calendar) } - def toYMInterval(yearStr: String, monthStr: String, sign: Int): Int = { - try { - val years = toLongWithRange(YEAR, yearStr, 0, Integer.MAX_VALUE / MONTHS_PER_YEAR) - val totalMonths = sign * (years * MONTHS_PER_YEAR + toLongWithRange(MONTH, monthStr, 0, 11)) - Math.toIntExact(totalMonths) - } catch { - case NonFatal(e) => - throw new IllegalArgumentException( - s"Error parsing interval year-month string: ${e.getMessage}", e) - } + def toDTInterval( + dayStr: String, + hourStr: String, + minuteStr: String, + secondStr: String, + sign: Int): Long = { + var micros = 0L + val days = toLongWithRange(DAY, dayStr, 0, Int.MaxValue).toInt + micros = Math.addExact(micros, sign * days * MICROS_PER_DAY) + val hours = toLongWithRange(HOUR, hourStr, 0, 23) + micros = Math.addExact(micros, sign * hours * MICROS_PER_HOUR) + val minutes = toLongWithRange(MINUTE, minuteStr, 0, 59) + micros = Math.addExact(micros, sign * minutes * MICROS_PER_MINUTE) + micros = Math.addExact(micros, sign * parseSecondNano(secondStr)) + micros } /** From 46fff9c8a913f0556844ebd5560acc0917eff438 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Sat, 1 May 2021 10:59:59 +0800 Subject: [PATCH 11/14] Update CastSuite.scala --- .../org/apache/spark/sql/catalyst/expressions/CastSuite.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 5145efe4055d..8e11ccfac12e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -32,8 +32,8 @@ import org.apache.spark.sql.catalyst.analysis.TypeCoercion.numericPrecedence import org.apache.spark.sql.catalyst.analysis.TypeCoercionSuite import org.apache.spark.sql.catalyst.expressions.aggregate.{CollectList, CollectSet} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext -import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, IntervalUtils} import org.apache.spark.sql.catalyst.util.DateTimeConstants._ +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ import org.apache.spark.sql.catalyst.util.DateTimeUtils._ import org.apache.spark.sql.catalyst.util.IntervalUtils.microsToDuration @@ -1776,7 +1776,6 @@ class CastSuite extends CastSuiteBase { } test("SPARK-35112: Cast string to day-time interval") { - val interval = IntervalUtils.fromDayTimeString("106751991 04:00:54.775807") checkEvaluation(cast(Literal.create("0 0:0:0"), DayTimeIntervalType), 0L) checkEvaluation(cast(Literal.create("INTERVAL '0 0:0:0' DAY TO SECOND"), DayTimeIntervalType), 0L) From 50addb479104c378a4f6873f1343fb99a9c8c3e9 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Sat, 1 May 2021 17:35:43 +0800 Subject: [PATCH 12/14] Update IntervalUtils.scala --- .../org/apache/spark/sql/catalyst/util/IntervalUtils.scala | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index e41d9103dcaf..7bfe95c19b2e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -162,13 +162,8 @@ object IntervalUtils { second } } -// val regex = dayTimePattern(DAY -> SECOND) val intervalStr = input.trimAll().toString intervalStr match { -// case regex("-", day, hour, minute, secondPer) => -// toDTInterval(day, hour, minute, secondPer, -1) -// case regex(_, day, hour, minute, secondPer) => -// toDTInterval(day, hour, minute, secondPer, 1) case daySecondPattern("-", day, hour, minute, second, micro) => toDTInterval(day, hour, minute, secondAndMicro(second, micro), -1) case daySecondPattern(_, day, hour, minute, second, micro) => From 4f8fc78d8a48381940ba89b20e3c3ba810c86113 Mon Sep 17 00:00:00 2001 From: AngersZhuuuu Date: Sat, 1 May 2021 23:01:16 +0800 Subject: [PATCH 13/14] Update sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala Co-authored-by: Maxim Gekk --- .../scala/org/apache/spark/sql/catalyst/expressions/Cast.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 43f85efb9207..c52578d91368 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -1371,7 +1371,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit private[this] def castToDayTimeIntervalCode(from: DataType): CastFunction = from match { case StringType => val util = IntervalUtils.getClass.getCanonicalName.stripSuffix("$") - (c, evPrim, evNull) => code"$evPrim = $util.castStringToDTInterval($c);" + (c, evPrim, _) => code"$evPrim = $util.castStringToDTInterval($c);" } private[this] def castToYearMonthIntervalCode(from: DataType): CastFunction = from match { From 4e2608c92809bc1e1e08271c98060592e3861728 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Sun, 2 May 2021 12:06:12 +0800 Subject: [PATCH 14/14] Done --- .../sql/catalyst/util/IntervalUtils.scala | 19 ++++++++++--------- .../sql/catalyst/expressions/CastSuite.scala | 2 +- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index 7bfe95c19b2e..f08f77ac2812 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -150,9 +150,11 @@ object IntervalUtils { } } - private val daySecondStringPattern = ("(?i)^(INTERVAL\\s+)([+|-])?(')" + - "([+|-])?(\\d+) (\\d{1,2}):(\\d{1,2}):(\\d{1,2})(\\.\\d{1,9})?(')(\\s+DAY\\s+TO\\s+SECOND)$").r - private val daySecondPattern = "^([+|-])?(\\d+) (\\d{1,2}):(\\d{1,2}):(\\d{1,2})(\\.\\d{1,9})?$".r + private val unquotedDaySecondPattern = + "([+|-])?(\\d+) (\\d{1,2}):(\\d{1,2}):(\\d{1,2})(\\.\\d{1,9})?" + private val quotedDaySecondPattern = (s"^$unquotedDaySecondPattern$$").r + private val daySecondLiteralPattern = + (s"(?i)^INTERVAL\\s+([+|-])?\\'$unquotedDaySecondPattern\\'\\s+DAY\\s+TO\\s+SECOND$$").r def castStringToDTInterval(input: UTF8String): Long = { def secondAndMicro(second: String, micro: String): String = { @@ -162,14 +164,13 @@ object IntervalUtils { second } } - val intervalStr = input.trimAll().toString - intervalStr match { - case daySecondPattern("-", day, hour, minute, second, micro) => + + input.trimAll().toString match { + case quotedDaySecondPattern("-", day, hour, minute, second, micro) => toDTInterval(day, hour, minute, secondAndMicro(second, micro), -1) - case daySecondPattern(_, day, hour, minute, second, micro) => + case quotedDaySecondPattern(_, day, hour, minute, second, micro) => toDTInterval(day, hour, minute, secondAndMicro(second, micro), 1) - case daySecondStringPattern( - _, firstSign, _, secondSign, day, hour, minute, second, micro, _, _) => + case daySecondLiteralPattern(firstSign, secondSign, day, hour, minute, second, micro) => (firstSign, secondSign) match { case ("-", "-") => toDTInterval(day, hour, minute, secondAndMicro(second, micro), 1) case ("-", _) => toDTInterval(day, hour, minute, secondAndMicro(second, micro), -1) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 8e11ccfac12e..cf7be470265f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -1777,7 +1777,7 @@ class CastSuite extends CastSuiteBase { test("SPARK-35112: Cast string to day-time interval") { checkEvaluation(cast(Literal.create("0 0:0:0"), DayTimeIntervalType), 0L) - checkEvaluation(cast(Literal.create("INTERVAL '0 0:0:0' DAY TO SECOND"), + checkEvaluation(cast(Literal.create(" interval '0 0:0:0' Day TO second "), DayTimeIntervalType), 0L) checkEvaluation(cast(Literal.create("INTERVAL '1 2:03:04' DAY TO SECOND"), DayTimeIntervalType), 93784000000L)