Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ object FunctionRegistry {
expression[Month]("month"),
expression[MonthsBetween]("months_between"),
expression[NextDay]("next_day"),
expression[CurrentTimestamp]("now", true),
expression[Now]("now"),
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the usage is different between now and current_timestamp, I added a Now expr.

Copy link
Member Author

@maropu maropu Apr 26, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

btw, we don't need to add a version here where an alias added? For example, current_timestamp and now have been implemented in the different versions, 1.5 and 1.6.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the difference between now and current_timestamp? Should now extend Nondeterministic, for example?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

expression[Quarter]("quarter"),
expression[Second]("second"),
expression[ParseToTimestamp]("to_timestamp"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,11 @@ package org.apache.spark.sql.catalyst.analysis

import org.apache.spark.internal.Logging
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, CurrentDate, CurrentTimestamp, MonotonicallyIncreasingID}
import org.apache.spark.sql.catalyst.expressions.{Attribute, CurrentDate, CurrentTimestamp, MonotonicallyIncreasingID, Now}
import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
import org.apache.spark.sql.catalyst.plans._
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.streaming.OutputMode

/**
Expand Down Expand Up @@ -412,7 +410,7 @@ object UnsupportedOperationChecker extends Logging {

subPlan.expressions.foreach { e =>
if (e.collectLeaves().exists {
case (_: CurrentTimestamp | _: CurrentDate) => true
case (_: CurrentTimestamp | _: Now | _: CurrentDate) => true
case _ => false
}) {
throwError(s"Continuous processing does not support current time operations.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ import org.apache.commons.text.StringEscapeUtils
import org.apache.spark.SparkUpgradeException
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
import org.apache.spark.sql.catalyst.expressions.codegen._
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
import org.apache.spark.sql.catalyst.util.{DateTimeUtils, LegacyDateFormats, TimestampFormatter}
Expand Down Expand Up @@ -62,7 +61,18 @@ trait TimeZoneAwareExpression extends Expression {
* There is no code generation since this expression should get constant folded by the optimizer.
*/
@ExpressionDescription(
usage = "_FUNC_() - Returns the current date at the start of query evaluation.",
usage = """
_FUNC_() - Returns the current date at the start of query evaluation.

_FUNC_ - Returns the current date at the start of query evaluation.
""",
examples = """
Examples:
> SELECT _FUNC_();
2020-04-25
> SELECT _FUNC_;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you remember when we started to support this syntax? I roughly remember it was added relatively recently. If the added version is different, it might be best to note it in note while we're here.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I found the PR to implement the syntax: #14442
Spark without the hive support has supported it since 2.0. Yea, I'll add this info as note.

2020-04-25
""",
group = "datetime_funcs",
since = "1.5.0")
case class CurrentDate(timeZoneId: Option[String] = None)
Expand All @@ -83,26 +93,44 @@ case class CurrentDate(timeZoneId: Option[String] = None)
override def prettyName: String = "current_date"
}

abstract class CurrentTimestampLike() extends LeafExpression with CodegenFallback {
override def foldable: Boolean = true
override def nullable: Boolean = false
override def dataType: DataType = TimestampType
override def eval(input: InternalRow): Any = currentTimestamp()
}

/**
* Returns the current timestamp at the start of query evaluation.
* All calls of current_timestamp within the same query return the same value.
*
* There is no code generation since this expression should get constant folded by the optimizer.
*/
@ExpressionDescription(
usage = "_FUNC_() - Returns the current timestamp at the start of query evaluation.",
usage = """
_FUNC_() - Returns the current timestamp at the start of query evaluation.

_FUNC_ - Returns the current timestamp at the start of query evaluation.
""",
examples = """
Examples:
> SELECT _FUNC_();
2020-04-25 15:49:11.914
> SELECT _FUNC_;
2020-04-25 15:49:11.914
""",
group = "datetime_funcs",
since = "1.5.0")
case class CurrentTimestamp() extends LeafExpression with CodegenFallback {
override def foldable: Boolean = true
override def nullable: Boolean = false

override def dataType: DataType = TimestampType

override def eval(input: InternalRow): Any = currentTimestamp()
case class CurrentTimestamp() extends CurrentTimestampLike {
override def prettyName: String = "current_timestamp"
}

override def prettyName: String =
getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("current_timestamp")
@ExpressionDescription(
usage = "_FUNC_() - Returns the current timestamp at the start of query evaluation.",
Copy link
Member

@dongjoon-hyun dongjoon-hyun Apr 26, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit. Do you think we can have examples = for now() since now usage is different(having different class and only functional form) from current_timestamp? Of course, we can skip it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah I see. I'll add an example for now in this PR, too. Actually, I think it is better to add examples for the expressions that do not have examples now.

group = "datetime_funcs",
since = "1.5.0")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess now should be 3.1.0.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, now has been supported since 1.6.0? https://issues.apache.org/jira/browse/SPARK-11768
Or, you suggested above that we should set the version where this expr added?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh. Sorry, I was confused. Never mind~

case class Now() extends CurrentTimestampLike {
override def prettyName: String = "now"
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1619,7 +1619,11 @@ case class StringSpace(child: Expression)
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(str, pos[, len]) - Returns the substring of `str` that starts at `pos` and is of length `len`, or the slice of byte array that starts at `pos` and is of length `len`.",
usage = """
_FUNC_(str, pos[, len]) - Returns the substring of `str` that starts at `pos` and is of length `len`, or the slice of byte array that starts at `pos` and is of length `len`.

_FUNC_(str FROM pos[ FOR len]]) - Returns the substring of `str` that starts at `pos` and is of length `len`, or the slice of byte array that starts at `pos` and is of length `len`.
""",
examples = """
Examples:
> SELECT _FUNC_('Spark SQL', 5);
Expand All @@ -1628,6 +1632,12 @@ case class StringSpace(child: Expression)
SQL
> SELECT _FUNC_('Spark SQL', 5, 1);
k
> SELECT _FUNC_('Spark SQL' FROM 5);
k SQL
> SELECT _FUNC_('Spark SQL' FROM -3);
SQL
> SELECT _FUNC_('Spark SQL' FROM 5 FOR 1);
k
""",
since = "1.5.0")
// scalastyle:on line.size.limit
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ object ComputeCurrentTime extends Rule[LogicalPlan] {
LocalDate.now(DateTimeUtils.getZoneId(timeZoneId)),
DateType)
})
case CurrentTimestamp() => currentTime
case CurrentTimestamp() | Now() => currentTime
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
val ignoreSet = Set(
// One of examples shows getting the current timestamp
"org.apache.spark.sql.catalyst.expressions.UnixTimestamp",
"org.apache.spark.sql.catalyst.expressions.CurrentDate",
"org.apache.spark.sql.catalyst.expressions.CurrentTimestamp",
"org.apache.spark.sql.catalyst.expressions.Now",
// Random output without a seed
"org.apache.spark.sql.catalyst.expressions.Rand",
"org.apache.spark.sql.catalyst.expressions.Randn",
Expand Down
2 changes: 1 addition & 1 deletion sql/gen-sql-functions-docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def _make_pretty_usage(infos):
for info in infos:
# Extracts (signature, description) pairs from `info.usage`, e.g.,
# the signature is `func(expr)` and the description is `...` in an usage `func(expr) - ...`.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to update this description accordingly because this PR dropped ( and ) requirement?

usages = iter(re.split(r"(%s\(.*\)) - " % info.name, info.usage.strip())[1:])
usages = iter(re.split(r"(%s.*) - " % info.name, info.usage.strip())[1:])
for (sig, description) in zip(usages, usages):
result.append(" <tr>")
result.append(" <td>%s</td>" % sig)
Expand Down