-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-29387][SQL] Support * and / operators for intervals
#26132
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 19 commits
af2e7ab
4227915
379a20a
3e9ed0f
670a7c6
3ae94cf
3bce68e
754109c
166dbd8
b4dc59a
1e2a9a6
a6b6d81
6e569c0
69a3cc7
001d17b
014cde5
049f428
1ca7c89
9e6745a
b428070
8ad4001
91337e5
2bb916f
6ba53f0
719fe6c
d05ffa4
34f6605
00ede6c
690d9c1
5b25432
2265449
e559fb9
35ab9c0
dbc39e8
8244460
b70c0f8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -355,6 +355,22 @@ public CalendarInterval subtract(CalendarInterval that) { | |
| return new CalendarInterval(months, microseconds); | ||
| } | ||
|
|
||
| private static CalendarInterval fromDoubles(double months, double microseconds) { | ||
| long roundedMonths = (long)(months); | ||
| // Using 30 days per month as PostgreSQL does. | ||
srowen marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| double microsInMonth = 30 * MICROS_PER_DAY * (months - roundedMonths); | ||
| long roundedMicroseconds = (long)(microseconds + microsInMonth); | ||
| return new CalendarInterval(Math.toIntExact(roundedMonths), roundedMicroseconds); | ||
| } | ||
|
|
||
| public CalendarInterval multiply(double num) { | ||
|
||
| return fromDoubles(num * this.months, num * this.microseconds); | ||
| } | ||
|
|
||
| public CalendarInterval divide(double num) { | ||
| return fromDoubles(this.months / num, this.microseconds / num); | ||
| } | ||
|
|
||
| public CalendarInterval negate() { | ||
| return new CalendarInterval(-this.months, -this.microseconds); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -831,6 +831,8 @@ object TypeCoercion { | |
| * 2. Turns Add/Subtract of TimestampType/DateType/IntegerType | ||
| * and TimestampType/IntegerType/DateType to DateAdd/DateSub/SubtractDates and | ||
| * to SubtractTimestamps. | ||
| * 3. Turns Multiply/Divide of CalendarIntervalType and NumericType | ||
| * to MultiplyInterval/DivideInterval | ||
| */ | ||
| object DateTimeOperations extends Rule[LogicalPlan] { | ||
|
|
||
|
|
@@ -846,6 +848,12 @@ object TypeCoercion { | |
| Cast(TimeAdd(l, r), l.dataType) | ||
| case Subtract(l, r @ CalendarIntervalType()) if acceptedTypes.contains(l.dataType) => | ||
| Cast(TimeSub(l, r), l.dataType) | ||
| case Multiply(l @ CalendarIntervalType(), r @ NumericType()) => | ||
| MultiplyInterval(l, r) | ||
| case Multiply(l @ NumericType(), r @ CalendarIntervalType()) => | ||
| MultiplyInterval(r, l) | ||
| case Divide(l @ CalendarIntervalType(), r @ NumericType()) => | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. postgres=# select interval '1 year' / '365';
?column?
---------------
23:40:16.4064
(1 row)could this be supported?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Taking into account the discussion in #26165, I am not sure. @cloud-fan Should I support this?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can, but this should only apply to literals, not string columns.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we want to support it, please open another PR. |
||
| DivideInterval(l, r) | ||
|
|
||
| case Add(l @ DateType(), r @ IntegerType()) => DateAdd(l, r) | ||
| case Add(l @ IntegerType(), r @ DateType()) => DateAdd(r, l) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,73 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.catalyst.expressions | ||
|
|
||
| import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} | ||
| import org.apache.spark.sql.types.{AbstractDataType, CalendarIntervalType, DataType, DoubleType} | ||
| import org.apache.spark.unsafe.types.CalendarInterval | ||
|
|
||
| abstract class IntervalNumOperation( | ||
| interval: Expression, | ||
| num: Expression, | ||
| operation: (CalendarInterval, Double) => CalendarInterval, | ||
| operationName: String) | ||
| extends BinaryExpression with ImplicitCastInputTypes with Serializable { | ||
| override def left: Expression = interval | ||
| override def right: Expression = num | ||
|
|
||
| override def inputTypes: Seq[AbstractDataType] = Seq(CalendarIntervalType, DoubleType) | ||
| override def dataType: DataType = CalendarIntervalType | ||
|
|
||
| override def nullable: Boolean = true | ||
|
|
||
| override def nullSafeEval(interval: Any, num: Any): Any = { | ||
| try { | ||
| operation(interval.asInstanceOf[CalendarInterval], num.asInstanceOf[Double]) | ||
| } catch { | ||
| case _: java.lang.ArithmeticException => null | ||
| } | ||
| } | ||
|
|
||
| override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { | ||
| nullSafeCodeGen(ctx, ev, (interval, num) => { | ||
| s""" | ||
| try { | ||
| ${ev.value} = $interval.$operationName($num); | ||
| } catch (java.lang.ArithmeticException e) { | ||
| ${ev.isNull} = true; | ||
| } | ||
| """ | ||
| }) | ||
| } | ||
|
|
||
| override def prettyName: String = operationName + "_interval" | ||
| } | ||
|
|
||
| case class MultiplyInterval(interval: Expression, num: Expression) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. https://github.com/apache/spark/pull/26345/files#diff-b83497f7bc11578a0b63a814a2a30f48R2198-R2207
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be added for only expressions registered as functions. |
||
| extends IntervalNumOperation( | ||
| interval, | ||
| num, | ||
| (i: CalendarInterval, n: Double) => i.multiply(n), | ||
| "multiply") | ||
|
|
||
| case class DivideInterval(interval: Expression, num: Expression) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| extends IntervalNumOperation( | ||
| interval, | ||
| num, | ||
| (i: CalendarInterval, n: Double) => i.divide(n), | ||
| "divide") | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,70 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.catalyst.expressions | ||
|
|
||
| import org.apache.spark.SparkFunSuite | ||
| import org.apache.spark.unsafe.types.CalendarInterval.fromString | ||
|
|
||
| class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { | ||
| test("multiply") { | ||
| def multiply(interval: String, num: Double): Expression = { | ||
| MultiplyInterval(Literal(fromString(interval)), Literal(num)) | ||
| } | ||
| checkEvaluation(multiply("0 seconds", 10), fromString("0 seconds")) | ||
| checkEvaluation(multiply("10 hours", 0), fromString("0 hours")) | ||
| checkEvaluation(multiply("12 months 1 microseconds", 2), fromString("2 years 2 microseconds")) | ||
| checkEvaluation(multiply("-5 year 3 seconds", 3), fromString("-15 years 9 seconds")) | ||
| checkEvaluation(multiply("1 year 1 second", 0.5), fromString("6 months 500 milliseconds")) | ||
| checkEvaluation( | ||
| multiply("-100 years -1 millisecond", 0.5), | ||
| fromString("-50 years -500 microseconds")) | ||
| checkEvaluation( | ||
| multiply("2 months 4 seconds", -0.5), | ||
| fromString("-1 months -2 seconds")) | ||
| checkEvaluation( | ||
| multiply("1 month 2 microseconds", 1.5), | ||
| fromString("1 months 15 days 3 microseconds")) | ||
| checkEvaluation(multiply("2 months", Int.MaxValue), null) | ||
| } | ||
|
|
||
| test("divide") { | ||
| def divide(interval: String, num: Double): Expression = { | ||
| DivideInterval(Literal(fromString(interval)), Literal(num)) | ||
| } | ||
| checkEvaluation(divide("0 seconds", 10), fromString("0 seconds")) | ||
| checkEvaluation( | ||
| divide("12 months 3 milliseconds", 2), | ||
| fromString("6 months 1 milliseconds 500 microseconds")) | ||
| checkEvaluation( | ||
| divide("-5 year 3 seconds", 3), | ||
| fromString("-1 years -8 months 1 seconds")) | ||
| checkEvaluation( | ||
| divide("6 years -7 seconds", 3), | ||
| fromString("2 years -2 seconds -333 milliseconds -333 microseconds")) | ||
| checkEvaluation( | ||
| divide("2 years -8 seconds", 0.5), | ||
| fromString("4 years -16 seconds")) | ||
| checkEvaluation( | ||
| divide("-1 month 2 microseconds", -0.25), | ||
| fromString("4 months -8 microseconds")) | ||
| checkEvaluation( | ||
| divide("1 month 3 microsecond", 1.5), | ||
| fromString("20 days 2 microseconds")) | ||
| checkEvaluation(divide("2 months", 0), null) | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
'truncatedMonths` or something? it's not exactly rounded