Skip to content
Closed
Prev Previous commit
Next Next commit
revert it back
  • Loading branch information
gatorsmile committed Jul 9, 2016
commit 028aa79e3c0b57cf887d4d735a387a5043612281
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,6 @@ class Analyzer(
TimeWindowing ::
TypeCoercion.typeCoercionRules ++
extendedResolutionRules : _*),
Batch("LIMIT", Once,
ResolveLimits),
Batch("Nondeterministic", Once,
PullOutNondeterministic),
Batch("UDF", Once,
Expand Down Expand Up @@ -2046,21 +2044,6 @@ object EliminateUnions extends Rule[LogicalPlan] {
}
}

/**
* Converts foldable numeric expressions to integers in [[GlobalLimit]] and [[LocalLimit]] operators
*/
object ResolveLimits extends Rule[LogicalPlan] {
def apply(plan: LogicalPlan): LogicalPlan = plan transform {
case g @ GlobalLimit(limitExpr, _) if limitExpr.foldable && isNumeric(limitExpr.eval()) =>
g.copy(limitExpr = Literal(Cast(limitExpr, IntegerType).eval(), IntegerType))
case l @ LocalLimit(limitExpr, _) if limitExpr.foldable && isNumeric(limitExpr.eval()) =>
l.copy(limitExpr = Literal(Cast(limitExpr, IntegerType).eval(), IntegerType))
}

private def isNumeric(value: Any): Boolean =
scala.util.Try(value.asInstanceOf[Number].intValue()).isSuccess
}

/**
* Cleans up unnecessary Aliases inside the plan. Basically we only need Alias as a top level
* expression in Project(project list) or Aggregate(aggregate expressions) or
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,14 @@ trait CheckAnalysis extends PredicateHelper {
if (!limitExpr.foldable) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it must be foldable, can we just use int as limit instead of expression?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you saying we should change the SQL parser? You know, if so, we also need to change the TABLESAMPLE n ROWS.

FYI, even Hive does not support foldable expressions.

hive> select * from t1 limit 1 + 1;
FAILED: ParseException line 1:25 missing EOF at '+' near '1'
hive> select * from t1 limit (1+1);
FAILED: ParseException line 1:23 extraneous input '(' expecting Number near '1'

I found that Impala supports it.
http://www.cloudera.com/documentation/archive/impala/2-x/2-1-x/topics/impala_limit.html

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm saying maybe we can declare Limit as case class GlobalLimit(limit: Int, child: LogicalPlan), but it's not a small change, we could think about it later.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

uh, I see. Thank you!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this may be more readable:

limitExpr match {
  case e if !e.foldable => fail(...)
  case e if e.dataType != IntegerType => fail("the limit expression must be int type, but got ${e.dataType.simpleString}")
  case e if e.eval() < 0 => fail(...)
  case e =>
}

failAnalysis(
"The argument to the LIMIT clause must evaluate to a constant value. " +
s"Limit:${limitExpr.sql}")
s"Limit:${limitExpr.sql}")
}
// Analyzer rule ResolveLimits already converts limitExpr to integers.
limitExpr match {
case IntegerLiteral(limit) if limit >= 0 => // OK
case IntegerLiteral(limit) => failAnalysis(
s"number_rows in limit clause must be equal to or greater than 0. number_rows:$limit")
case o => failAnalysis(s"""number_rows in limit clause cannot be cast to integer:"$o".""")
limitExpr.eval() match {
case o: Int if o >= 0 => // OK
case o: Int => failAnalysis(
s"number_rows in limit clause must be equal to or greater than 0. number_rows:$o")
case o => failAnalysis(
Copy link
Contributor

@cloud-fan cloud-fan Jul 8, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we allow other integral type? e.g. byte, long, etc.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You know, we can do it, but we need to fix the other parts. Let me try it. Thanks!

Copy link
Member Author

@gatorsmile gatorsmile Jul 8, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tried to do it in different ways, but the best way I found is to introduce an Analyzer rule.

We have one rule in Optimizer and one rule in Planner for Limit. Both assume the limitExpr is an Integer type. It looks ugly to convert the type everywhere. To support different numeric types, we need to convert these foldable numeric values to int in Analyzer, I think. Let me upload a fix at first. You can judge whether this is a good way or not. Thanks!

s"""number_rows in limit clause cannot be cast to integer:\"$o\".""")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cannot be cast to integer -> must be integer? e.g. byte is castable to int.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!

}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,7 @@ case class GlobalLimit(limitExpr: Expression, child: LogicalPlan) extends UnaryN
}
}
override lazy val statistics: Statistics = {
val limit = limitExpr.eval().asInstanceOf[Number].intValue()
val limit = limitExpr.eval().asInstanceOf[Int]
val sizeInBytes = if (limit == 0) {
// sizeInBytes can't be zero, or sizeInBytes of BinaryNode will also be zero
// (product of children).
Expand All @@ -680,7 +680,7 @@ case class LocalLimit(limitExpr: Expression, child: LogicalPlan) extends UnaryNo
}
}
override lazy val statistics: Statistics = {
val limit = limitExpr.eval().asInstanceOf[Number].intValue()
val limit = limitExpr.eval().asInstanceOf[Int]
val sizeInBytes = if (limit == 0) {
// sizeInBytes can't be zero, or sizeInBytes of BinaryNode will also be zero
// (product of children).
Expand Down
20 changes: 0 additions & 20 deletions sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -670,26 +670,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
checkAnswer(
sql("SELECT * FROM mapData LIMIT 1"),
mapData.collect().take(1).map(Row.fromTuple).toSeq)

checkAnswer(
sql("SELECT * FROM mapData LIMIT CAST(1 AS Double)"),
mapData.collect().take(1).map(Row.fromTuple).toSeq)

checkAnswer(
sql("SELECT * FROM mapData LIMIT CAST(1 AS BYTE)"),
mapData.collect().take(1).map(Row.fromTuple).toSeq)

checkAnswer(
sql("SELECT * FROM mapData LIMIT CAST(1 AS LONG)"),
mapData.collect().take(1).map(Row.fromTuple).toSeq)

checkAnswer(
sql("SELECT * FROM mapData LIMIT CAST(1 AS SHORT)"),
mapData.collect().take(1).map(Row.fromTuple).toSeq)

checkAnswer(
sql("SELECT * FROM mapData LIMIT CAST(1 AS FLOAT)"),
mapData.collect().take(1).map(Row.fromTuple).toSeq)
}

test("non-foldable expressions in LIMIT") {
Expand Down