Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ import org.apache.spark.sql.types._
> SELECT _FUNC_(10.0, array(0.5, 0.4, 0.1), 100);
[10.0,10.0,10.0]
> SELECT _FUNC_(10.0, 0.5, 100);
10.0
10
""",
since = "2.1.0")
case class ApproximatePercentile(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ case class VarianceSamp(child: Expression) extends CentralMomentAgg(child) {
examples = """
Examples:
> SELECT _FUNC_(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col);
1.1135657469022013
1.1135657469022011
> SELECT _FUNC_(col) FROM VALUES (-1000), (-100), (10), (20) AS tab(col);
-1.1135657469022011
""",
Expand All @@ -245,9 +245,9 @@ case class Skewness(child: Expression) extends CentralMomentAgg(child) {
examples = """
Examples:
> SELECT _FUNC_(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col);
-0.7014368047529618
-0.7014368047529627
> SELECT _FUNC_(col) FROM VALUES (1), (10), (100), (10), (1) as tab(col);
0.19432323191698986
0.19432323191699075
""",
since = "1.6.0")
case class Kurtosis(child: Expression) extends CentralMomentAgg(child) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,7 @@ case class IntegralDivide(left: Expression, right: Expression) extends DivModLik
usage = "expr1 _FUNC_ expr2 - Returns the remainder after `expr1`/`expr2`.",
examples = """
Examples:
> SELECT 2 _FUNC_ 1.8;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ur, I know the intention, but this is a revert of [SPARK-29237][SQL] Prevent real function names in expression example template.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately, using of the _FUNC_ template is impossible here because the expression has 2 function names % and mod but one of the example always causes an error:

spark-sql> SELECT 2 mod 1.8;
Error in query: 
extraneous input '1.8' expecting <EOF>(line 1, pos 13)

== SQL ==
SELECT 2 mod 1.8
-------------^^^
spark-sql> SELECT %(2, 1.8);
Error in query: 
no viable alternative at input 'SELECT %'(line 1, pos 7)

== SQL ==
SELECT %(2, 1.8)
-------^^^

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought blacklisting is a better idea for this.
And, the blacklisting was in this PR already before, isn't it?

Copy link
Member Author

@MaxGekk MaxGekk Sep 27, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, probably I don't understand you. Do you want to show not working example to users?
For MOD, the examples (after replacing _FUNC_ by MOD) are:

    Examples:
      > SELECT 2 MOD 1.8;
       0.2
      > SELECT MOD(2, 1.8);
       0.2

where the first one is incorrect.
For %, both are correct:

    Examples:
      > SELECT 2 % 1.8;
       0.2
      > SELECT MOD(2, 1.8);
       0.2

Using _FUNC_ for the first example, or for the second one, or for both always produce an incorrect example. For the examples , we cannot use _FUNC_ at all.

> SELECT 2 % 1.8;
0.2
> SELECT MOD(2, 1.8);
0.2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -422,9 +422,9 @@ case class CreateNamedStructUnsafe(children: Seq[Expression]) extends CreateName
examples = """
Examples:
> SELECT _FUNC_('a:1,b:2,c:3', ',', ':');
map("a":"1","b":"2","c":"3")
{"a":"1","b":"2","c":"3"}
> SELECT _FUNC_('a');
map("a":null)
{"a":null}
""")
// scalastyle:on line.size.limit
case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: Expression)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ import org.apache.spark.unsafe.types.UTF8String
examples = """
Examples:
> SELECT _FUNC_('1, 0.8', 'a INT, b DOUBLE');
{"a":1, "b":0.8}
> SELECT _FUNC_('26/08/2015', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy'))
{"a":1,"b":0.8}
> SELECT _FUNC_('26/08/2015', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy'));
{"time":2015-08-26 00:00:00.0}
""",
since = "3.0.0")
Expand Down Expand Up @@ -199,7 +199,7 @@ case class SchemaOfCsv(
> SELECT _FUNC_(named_struct('a', 1, 'b', 2));
1,2
> SELECT _FUNC_(named_struct('time', to_timestamp('2015-08-26', 'yyyy-MM-dd')), map('timestampFormat', 'dd/MM/yyyy'));
"26/08/2015"
26/08/2015
""",
since = "3.0.0")
// scalastyle:on line.size.limit
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -631,7 +631,7 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti
examples = """
Examples:
> SELECT _FUNC_('2016-04-08', 'yyyy-MM-dd');
1460041200
1460098800
""",
since = "1.6.0")
case class ToUnixTimestamp(
Expand Down Expand Up @@ -842,7 +842,7 @@ abstract class UnixTime extends ToTimestamp {
examples = """
Examples:
> SELECT _FUNC_(0, 'yyyy-MM-dd HH:mm:ss');
1970-01-01 00:00:00
1969-12-31 16:00:00
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, surprising.

Copy link
Member Author

@MaxGekk MaxGekk Sep 26, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The yyyy-MM-dd HH:mm:ss pattern does not contain the time zone sub-pattern. If you point out it, you will see something like:

spark-sql> SELECT from_unixtime(0, 'yyyy-MM-dd HH:mm:ssXXX');
1970-01-01 03:00:00+03:00

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And you can change your current time zone to UTC to see 1970-01-01 00:00:00:

spark-sql> set spark.sql.session.timeZone=UTC;
spark.sql.session.timeZone	UTC
spark-sql> SELECT from_unixtime(0, 'yyyy-MM-dd HH:mm:ssXXX');
1970-01-01 00:00:00Z

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ya. The timezone issue will make a failure on different timezone machines.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but the time zone is forcibly set to "America/Los_Angeles" in tests:

TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles"))

.createWithDefaultFunction(() => TimeZone.getDefault.getID)

""",
since = "1.5.0")
case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[String] = None)
Expand Down Expand Up @@ -1766,10 +1766,10 @@ case class MakeDate(year: Expression, month: Expression, day: Expression)
> SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887);
2014-12-28 06:30:45.887
> SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887, 'CET');
2014-12-28 10:30:45.887
> SELECT _FUNC_(2019, 6, 30, 23, 59, 60)
2014-12-27 21:30:45.887
> SELECT _FUNC_(2019, 6, 30, 23, 59, 60);
2019-07-01 00:00:00
> SELECT _FUNC_(2019, 13, 1, 10, 11, 12, 13);
> SELECT _FUNC_(2019, 13, 1, 10, 11, 12, 'PST');
NULL
> SELECT _FUNC_(null, 7, 22, 15, 30, 0);
NULL
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,16 +127,16 @@ case class UserDefinedGenerator(
* 3 NULL
* }}}
*/
// scalastyle:off line.size.limit
// scalastyle:off line.size.limit line.contains.tab
@ExpressionDescription(
usage = "_FUNC_(n, expr1, ..., exprk) - Separates `expr1`, ..., `exprk` into `n` rows. Uses column names col0, col1, etc. by default unless specified otherwise.",
examples = """
Examples:
> SELECT _FUNC_(2, 1, 2, 3);
1 2
3 NULL
1 2
3 NULL
""")
// scalastyle:on line.size.limit
// scalastyle:on line.size.limit line.contains.tab
case class Stack(children: Seq[Expression]) extends Generator {

private lazy val numRows = children.head.eval().asInstanceOf[Int]
Expand Down Expand Up @@ -375,33 +375,33 @@ case class Explode(child: Expression) extends ExplodeBase {
* 1 20
* }}}
*/
// scalastyle:off line.size.limit
// scalastyle:off line.size.limit line.contains.tab
@ExpressionDescription(
usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows with positions, or the elements of map `expr` into multiple rows and columns with positions. Unless specified otherwise, uses the column name `pos` for position, `col` for elements of the array or `key` and `value` for elements of the map.",
examples = """
Examples:
> SELECT _FUNC_(array(10,20));
0 10
1 20
0 10
1 20
""")
// scalastyle:on line.size.limit
// scalastyle:on line.size.limit line.contains.tab
case class PosExplode(child: Expression) extends ExplodeBase {
override val position = true
}

/**
* Explodes an array of structs into a table.
*/
// scalastyle:off line.size.limit
// scalastyle:off line.size.limit line.contains.tab
@ExpressionDescription(
usage = "_FUNC_(expr) - Explodes an array of structs into a table. Uses column names col1, col2, etc. by default unless specified otherwise.",
examples = """
Examples:
> SELECT _FUNC_(array(struct(1, 'a'), struct(2, 'b')));
1 a
2 b
1 a
2 b
""")
// scalastyle:on line.size.limit
// scalastyle:on line.size.limit line.contains.tab
case class Inline(child: Expression) extends UnaryExpression with CollectionGenerator {
override val inline: Boolean = true
override val position: Boolean = false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ trait GroupingSet extends Expression with CodegenFallback {
override def eval(input: InternalRow): Any = throw new UnsupportedOperationException
}

// scalastyle:off line.size.limit
// scalastyle:off line.size.limit line.contains.tab
@ExpressionDescription(
usage = """
_FUNC_([col1[, col2 ..]]) - create a multi-dimensional cube using the specified columns
Expand All @@ -47,19 +47,19 @@ trait GroupingSet extends Expression with CodegenFallback {
examples = """
Examples:
> SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY _FUNC_(name, age);
NULL 2 1
NULL NULL 2
Alice 2 1
Bob 5 1
NULL 5 1
Bob NULL 1
Alice NULL 1
Bob 5 1
Alice 2 1
NULL NULL 2
NULL 5 1
Bob NULL 1
Alice NULL 1
NULL 2 1
""",
since = "2.0.0")
// scalastyle:on line.size.limit
// scalastyle:on line.size.limit line.contains.tab
case class Cube(groupByExprs: Seq[Expression]) extends GroupingSet {}

// scalastyle:off line.size.limit
// scalastyle:off line.size.limit line.contains.tab
@ExpressionDescription(
usage = """
_FUNC_([col1[, col2 ..]]) - create a multi-dimensional rollup using the specified columns
Expand All @@ -68,21 +68,21 @@ case class Cube(groupByExprs: Seq[Expression]) extends GroupingSet {}
examples = """
Examples:
> SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY _FUNC_(name, age);
NULL NULL 2
Alice 2 1
Bob 5 1
Bob NULL 1
Alice NULL 1
Bob 5 1
Alice 2 1
NULL NULL 2
Bob NULL 1
Alice NULL 1
""",
since = "2.0.0")
// scalastyle:on line.size.limit
// scalastyle:on line.size.limit line.contains.tab
case class Rollup(groupByExprs: Seq[Expression]) extends GroupingSet {}

/**
* Indicates whether a specified column expression in a GROUP BY list is aggregated or not.
* GROUPING returns 1 for aggregated or 0 for not aggregated in the result set.
*/
// scalastyle:off line.size.limit
// scalastyle:off line.size.limit line.contains.tab
@ExpressionDescription(
usage = """
_FUNC_(col) - indicates whether a specified column in a GROUP BY is aggregated or
Expand All @@ -91,12 +91,12 @@ case class Rollup(groupByExprs: Seq[Expression]) extends GroupingSet {}
examples = """
Examples:
> SELECT name, _FUNC_(name), sum(age) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name);
Alice 0 2
NULL 1 7
Bob 0 5
Bob 0 5
Alice 0 2
NULL 1 7
""",
since = "2.0.0")
// scalastyle:on line.size.limit
// scalastyle:on line.size.limit line.contains.tab
case class Grouping(child: Expression) extends Expression with Unevaluable {
@transient
override lazy val references: AttributeSet =
Expand All @@ -111,7 +111,7 @@ case class Grouping(child: Expression) extends Expression with Unevaluable {
*
* If groupByExprs is empty, it means all grouping expressions in GroupingSets.
*/
// scalastyle:off line.size.limit
// scalastyle:off line.size.limit line.contains.tab
@ExpressionDescription(
usage = """
_FUNC_([col1[, col2 ..]]) - returns the level of grouping, equals to
Expand All @@ -120,20 +120,20 @@ case class Grouping(child: Expression) extends Expression with Unevaluable {
examples = """
Examples:
> SELECT name, _FUNC_(), sum(age), avg(height) FROM VALUES (2, 'Alice', 165), (5, 'Bob', 180) people(age, name, height) GROUP BY cube(name, height);
NULL 2 2 165.0
Alice 0 2 165.0
NULL 2 5 180.0
NULL 3 7 172.5
Bob 0 5 180.0
Bob 1 5 180.0
Alice 1 2 165.0
NULL 2 5 180.0
Alice 0 2 165.0
NULL 3 7 172.5
NULL 2 2 165.0
Bob 1 5 180.0
Alice 1 2 165.0
Bob 0 5 180.0
""",
note = """
Input columns should match with grouping columns exactly, or empty (means all the grouping
columns).
""",
since = "2.0.0")
// scalastyle:on line.size.limit
// scalastyle:on line.size.limit line.contains.tab
case class GroupingID(groupByExprs: Seq[Expression]) extends Expression with Unevaluable {
@transient
override lazy val references: AttributeSet =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,7 @@ case class ArrayExists(
> SELECT _FUNC_(array(1, null, 3), x -> x % 2 == 0);
false
> SELECT _FUNC_(array(2, null, 8), x -> x % 2 == 0);
null
NULL
""",
since = "3.0.0")
case class ArrayForAll(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -331,15 +331,15 @@ case class GetJsonObject(json: Expression, path: Expression)
}
}

// scalastyle:off line.size.limit
// scalastyle:off line.size.limit line.contains.tab
@ExpressionDescription(
usage = "_FUNC_(jsonStr, p1, p2, ..., pn) - Returns a tuple like the function get_json_object, but it takes multiple names. All the input parameters and output column types are string.",
examples = """
Examples:
> SELECT _FUNC_('{"a":1, "b":2}', 'a', 'b');
1 2
1 2
""")
// scalastyle:on line.size.limit
// scalastyle:on line.size.limit line.contains.tab
case class JsonTuple(children: Seq[Expression])
extends Generator with CodegenFallback {

Expand Down Expand Up @@ -502,9 +502,9 @@ case class JsonTuple(children: Seq[Expression])
examples = """
Examples:
> SELECT _FUNC_('{"a":1, "b":0.8}', 'a INT, b DOUBLE');
{"a":1, "b":0.8}
{"a":1,"b":0.8}
> SELECT _FUNC_('{"time":"26/08/2015"}', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy'));
{"time":"2015-08-26 00:00:00.0"}
{"time":2015-08-26 00:00:00.0}
""",
since = "2.2.0")
// scalastyle:on line.size.limit
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1291,7 +1291,7 @@ abstract class RoundBase(child: Expression, scale: Expression,
examples = """
Examples:
> SELECT _FUNC_(2.5, 0);
3.0
3
""")
// scalastyle:on line.size.limit
case class Round(child: Expression, scale: Expression)
Expand All @@ -1311,7 +1311,7 @@ case class Round(child: Expression, scale: Expression)
examples = """
Examples:
> SELECT _FUNC_(2.5, 0);
2.0
2
""")
// scalastyle:on line.size.limit
case class BRound(child: Expression, scale: Expression)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ abstract class StringRegexExpression extends BinaryExpression
""",
examples = """
Examples:
> SELECT '%SystemDrive%\Users\John' _FUNC_ '\%SystemDrive\%\\Users%'
> SELECT '%SystemDrive%\Users\John' _FUNC_ '\%SystemDrive\%\Users%';
true
""",
note = """
Expand Down Expand Up @@ -153,6 +153,7 @@ case class Like(left: Expression, right: Expression) extends StringRegexExpressi
}
}

// scalastyle:off line.contains.tab
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to check tab separators in examples when testing them (I mean we cannot ignore tabs in tests)? I feel a little annoying to add this line...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you want to modify output of examples and replace tabs by something else? Just in case, we have to do the same while writing new examples, right. So, you cannot just copy-paste output from a terminal as is. If someone doesn't know this, he/she will lose time on troubleshooting test failures.

@ExpressionDescription(
usage = "str _FUNC_ regexp - Returns true if `str` matches `regexp`, or false otherwise.",
arguments = """
Expand All @@ -170,18 +171,20 @@ case class Like(left: Expression, right: Expression) extends StringRegexExpressi
""",
examples = """
Examples:
When spark.sql.parser.escapedStringLiterals is disabled (default).
> SELECT '%SystemDrive%\Users\John' _FUNC_ '%SystemDrive%\\Users.*'
> SET spark.sql.parser.escapedStringLiterals=true;
spark.sql.parser.escapedStringLiterals true
> SELECT '%SystemDrive%\Users\John' _FUNC_ '%SystemDrive%\\Users.*';
true

When spark.sql.parser.escapedStringLiterals is enabled.
> SELECT '%SystemDrive%\Users\John' _FUNC_ '%SystemDrive%\Users.*'
> SET spark.sql.parser.escapedStringLiterals=false;
spark.sql.parser.escapedStringLiterals false
> SELECT '%SystemDrive%\Users\John' _FUNC_ '%SystemDrive%\Users.*';
true
""",
note = """
Use LIKE to match with simple string pattern.
""",
since = "1.0.0")
// scalastyle:on line.contains.tab
case class RLike(left: Expression, right: Expression) extends StringRegexExpression {

override def escape(v: String): String = v
Expand Down
Loading