Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Revert "[SPARK-29951][SQL] Make the behavior of Postgre dialect indep…
…endent of ansi mode config"

This reverts commit 23b3c4f.
  • Loading branch information
xuanyuanking committed Dec 9, 2019
commit 92e459baaccec2d3d211ae8f6a1042df80b4f270
7 changes: 3 additions & 4 deletions docs/sql-keywords.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,12 @@ license: |
limitations under the License.
---

When `spark.sql.dialect=PostgreSQL` or keep default `spark.sql.dialect=Spark` with setting `spark.sql.dialect.spark.ansi.enabled` to true, Spark SQL will use the ANSI mode parser.
In this mode, Spark SQL has two kinds of keywords:
When `spark.sql.dialect.spark.ansi.enabled` is true, Spark SQL has two kinds of keywords:
* Reserved keywords: Keywords that are reserved and can't be used as identifiers for table, view, column, function, alias, etc.
* Non-reserved keywords: Keywords that have a special meaning only in particular contexts and can be used as identifiers in other contexts. For example, `SELECT 1 WEEK` is an interval literal, but WEEK can be used as identifiers in other places.

When the ANSI mode is disabled, Spark SQL has two kinds of keywords:
* Non-reserved keywords: Same definition as the one when the ANSI mode enabled.
When `spark.sql.dialect.spark.ansi.enabled` is false, Spark SQL has two kinds of keywords:
* Non-reserved keywords: Same definition as the one when `spark.sql.dialect.spark.ansi.enabled=true`.
* Strict-non-reserved keywords: A strict version of non-reserved keywords, which can not be used as table alias.

By default `spark.sql.dialect.spark.ansi.enabled` is false.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@ grammar SqlBase;
}

/**
* When true, the behavior of keywords follows ANSI SQL standard.
* When true, ANSI SQL parsing mode is enabled.
*/
public boolean SQL_standard_keyword_behavior = false;
public boolean ansi = false;
}

singleStatement
Expand Down Expand Up @@ -766,7 +766,7 @@ primaryExpression
| qualifiedName '.' ASTERISK #star
| '(' namedExpression (',' namedExpression)+ ')' #rowConstructor
| '(' query ')' #subqueryExpression
| functionName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')'
| qualifiedName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')'
(OVER windowSpec)? #functionCall
| identifier '->' expression #lambda
| '(' identifier (',' identifier)+ ')' '->' expression #lambda
Expand Down Expand Up @@ -810,7 +810,7 @@ booleanValue

interval
: INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)?
| {SQL_standard_keyword_behavior}? (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)
| {ansi}? (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)
;

errorCapturingMultiUnitsInterval
Expand Down Expand Up @@ -930,12 +930,6 @@ qualifiedNameList
: qualifiedName (',' qualifiedName)*
;

functionName
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is for ANSI-compliant, not pgsql dialect, we should keep it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

: qualifiedName
| LEFT
| RIGHT
;

qualifiedName
: identifier ('.' identifier)*
;
Expand All @@ -955,14 +949,14 @@ errorCapturingIdentifierExtra

identifier
: strictIdentifier
| {!SQL_standard_keyword_behavior}? strictNonReserved
| {!ansi}? strictNonReserved
;

strictIdentifier
: IDENTIFIER #unquotedIdentifier
| quotedIdentifier #quotedIdentifierAlternative
| {SQL_standard_keyword_behavior}? ansiNonReserved #unquotedIdentifier
| {!SQL_standard_keyword_behavior}? nonReserved #unquotedIdentifier
| {ansi}? ansiNonReserved #unquotedIdentifier
| {!ansi}? nonReserved #unquotedIdentifier
;

quotedIdentifier
Expand All @@ -981,7 +975,7 @@ number
| MINUS? BIGDECIMAL_LITERAL #bigDecimalLiteral
;

// When `SQL_standard_keyword_behavior=true`, there are 2 kinds of keywords in Spark SQL.
// When `spark.sql.dialect.spark.ansi.enabled=true`, there are 2 kinds of keywords in Spark SQL.
// - Reserved keywords:
// Keywords that are reserved and can't be used as identifiers for table, view, column,
// function, alias, etc.
Expand Down Expand Up @@ -1181,9 +1175,9 @@ ansiNonReserved
| YEARS
;

// When `SQL_standard_keyword_behavior=false`, there are 2 kinds of keywords in Spark SQL.
// When `spark.sql.dialect.spark.ansi.enabled=false`, there are 2 kinds of keywords in Spark SQL.
// - Non-reserved keywords:
// Same definition as the one when `SQL_standard_keyword_behavior=true`.
// Same definition as the one when `spark.sql.dialect.spark.ansi.enabled=true`.
// - Strict-non-reserved keywords:
// A strict version of non-reserved keywords, which can not be used as table alias.
// You can find the full keywords list by searching "Start of the keywords list" in this file.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1589,7 +1589,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
*/
override def visitFunctionCall(ctx: FunctionCallContext): Expression = withOrigin(ctx) {
// Create the function call.
val name = ctx.functionName.getText
val name = ctx.qualifiedName.getText
val isDistinct = Option(ctx.setQuantifier()).exists(_.DISTINCT != null)
val arguments = ctx.argument.asScala.map(expression) match {
case Seq(UnresolvedStar(None))
Expand All @@ -1599,8 +1599,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
case expressions =>
expressions
}
val function = UnresolvedFunction(
getFunctionIdentifier(ctx.functionName), arguments, isDistinct)
val function = UnresolvedFunction(visitFunctionName(ctx.qualifiedName), arguments, isDistinct)

// Check if the function is evaluated in a windowed context.
ctx.windowSpec match {
Expand Down Expand Up @@ -1640,17 +1639,6 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
}
}

/**
* Get a function identifier consist by database (optional) and name.
*/
protected def getFunctionIdentifier(ctx: FunctionNameContext): FunctionIdentifier = {
if (ctx.qualifiedName != null) {
visitFunctionName(ctx.qualifiedName)
} else {
FunctionIdentifier(ctx.getText, None)
}
}

/**
* Create an [[LambdaFunction]].
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.trees.Origin
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.Dialect
import org.apache.spark.sql.types.{DataType, StructType}

/**
Expand Down Expand Up @@ -89,20 +88,13 @@ abstract class AbstractSqlParser(conf: SQLConf) extends ParserInterface with Log
protected def parse[T](command: String)(toResult: SqlBaseParser => T): T = {
logDebug(s"Parsing command: $command")

// When we use PostgreSQL dialect or use Spark dialect with setting
// `spark.sql.dialect.spark.ansi.enabled=true`, the parser will use ANSI SQL standard keywords.
val SQLStandardKeywordBehavior = conf.dialect match {
case Dialect.POSTGRESQL => true
case Dialect.SPARK => conf.dialectSparkAnsiEnabled
}

val lexer = new SqlBaseLexer(new UpperCaseCharStream(CharStreams.fromString(command)))
lexer.removeErrorListeners()
lexer.addErrorListener(ParseErrorListener)
lexer.legacy_setops_precedence_enbled = conf.setOpsPrecedenceEnforced
lexer.legacy_exponent_literal_as_decimal_enabled = conf.exponentLiteralAsDecimalEnabled
lexer.legacy_create_hive_table_by_default_enabled = conf.createHiveTableByDefaultEnabled
lexer.SQL_standard_keyword_behavior = SQLStandardKeywordBehavior
lexer.SQL_standard_keyword_behavior = conf.dialectSparkAnsiEnabled

val tokenStream = new CommonTokenStream(lexer)
val parser = new SqlBaseParser(tokenStream)
Expand All @@ -112,7 +104,7 @@ abstract class AbstractSqlParser(conf: SQLConf) extends ParserInterface with Log
parser.legacy_setops_precedence_enbled = conf.setOpsPrecedenceEnforced
parser.legacy_exponent_literal_as_decimal_enabled = conf.exponentLiteralAsDecimalEnabled
parser.legacy_create_hive_table_by_default_enabled = conf.createHiveTableByDefaultEnabled
parser.SQL_standard_keyword_behavior = SQLStandardKeywordBehavior
parser.SQL_standard_keyword_behavior = conf.dialectSparkAnsiEnabled

try {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2555,9 +2555,7 @@ class SQLConf extends Serializable with Logging {

def intervalOutputStyle: IntervalStyle.Value = IntervalStyle.withName(getConf(INTERVAL_STYLE))

def dialect: Dialect.Value = Dialect.withName(getConf(DIALECT))

def usePostgreSQLDialect: Boolean = dialect == Dialect.POSTGRESQL
def usePostgreSQLDialect: Boolean = getConf(DIALECT) == Dialect.POSTGRESQL.toString

def dialectSparkAnsiEnabled: Boolean = getConf(DIALECT_SPARK_ANSI_ENABLED)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,11 @@ select concat_ws(',',10,20,null,30);
select concat_ws('',10,20,null,30);
select concat_ws(NULL,10,20,null,30) is null;
select reverse('abcde');
-- [SPARK-28036] Built-in udf left/right has inconsistent behavior
-- [SPARK-28479][SPARK-28989] Parser error when enabling ANSI mode
set spark.sql.dialect.spark.ansi.enabled=false;
select i, left('ahoj', i), right('ahoj', i) from range(-5, 6) t(i) order by i;
set spark.sql.dialect.spark.ansi.enabled=true;
-- [SPARK-28037] Add built-in String Functions: quote_literal
-- select quote_literal('');
-- select quote_literal('abc''');
Expand Down
Loading