Revert "[SPARK-29951][SQL] Make the behavior of Postgre dialect indep…

…endent of ansi mode config" This reverts commit 23b3c4f.
apache · xuanyuanking · Dec 3, 2019 · Dec 3, 2019 · Dec 3, 2019 · Dec 3, 2019
commit 92e459baaccec2d3d211ae8f6a1042df80b4f270
diff --git a/docs/sql-keywords.md b/docs/sql-keywords.md
@@ -19,13 +19,12 @@ license: |
   limitations under the License.
 ---
 
-When `spark.sql.dialect=PostgreSQL` or keep default `spark.sql.dialect=Spark` with setting `spark.sql.dialect.spark.ansi.enabled` to true, Spark SQL will use the ANSI mode parser.
-In this mode, Spark SQL has two kinds of keywords:
+When `spark.sql.dialect.spark.ansi.enabled` is true, Spark SQL has two kinds of keywords:
 * Reserved keywords: Keywords that are reserved and can't be used as identifiers for table, view, column, function, alias, etc.
 * Non-reserved keywords: Keywords that have a special meaning only in particular contexts and can be used as identifiers in other contexts. For example, `SELECT 1 WEEK` is an interval literal, but WEEK can be used as identifiers in other places.
 
-When the ANSI mode is disabled, Spark SQL has two kinds of keywords:
-* Non-reserved keywords: Same definition as the one when the ANSI mode enabled.
+When `spark.sql.dialect.spark.ansi.enabled` is false, Spark SQL has two kinds of keywords:
+* Non-reserved keywords: Same definition as the one when `spark.sql.dialect.spark.ansi.enabled=true`.
 * Strict-non-reserved keywords: A strict version of non-reserved keywords, which can not be used as table alias.
 
 By default `spark.sql.dialect.spark.ansi.enabled` is false.

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -58,9 +58,9 @@ grammar SqlBase;
   }
 
   /**
-   * When true, the behavior of keywords follows ANSI SQL standard.
+   * When true, ANSI SQL parsing mode is enabled.
    */
-  public boolean SQL_standard_keyword_behavior = false;
+  public boolean ansi = false;
 }
 
 singleStatement
@@ -766,7 +766,7 @@ primaryExpression
     | qualifiedName '.' ASTERISK                                                               #star
     | '(' namedExpression (',' namedExpression)+ ')'                                           #rowConstructor
     | '(' query ')'                                                                            #subqueryExpression
-    | functionName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')'
+    | qualifiedName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')'
        (OVER windowSpec)?                                                                      #functionCall
     | identifier '->' expression                                                               #lambda
     | '(' identifier (',' identifier)+ ')' '->' expression                                     #lambda
@@ -810,7 +810,7 @@ booleanValue
 
 interval
     : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)?
-    | {SQL_standard_keyword_behavior}? (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)
+    | {ansi}? (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)
     ;
 
 errorCapturingMultiUnitsInterval
@@ -930,12 +930,6 @@ qualifiedNameList
     : qualifiedName (',' qualifiedName)*
     ;
 
-functionName
-    : qualifiedName
-    | LEFT
-    | RIGHT
-    ;
-
 qualifiedName
     : identifier ('.' identifier)*
     ;
@@ -955,14 +949,14 @@ errorCapturingIdentifierExtra
 
 identifier
     : strictIdentifier
-    | {!SQL_standard_keyword_behavior}? strictNonReserved
+    | {!ansi}? strictNonReserved
     ;
 
 strictIdentifier
     : IDENTIFIER              #unquotedIdentifier
     | quotedIdentifier        #quotedIdentifierAlternative
-    | {SQL_standard_keyword_behavior}? ansiNonReserved #unquotedIdentifier
-    | {!SQL_standard_keyword_behavior}? nonReserved    #unquotedIdentifier
+    | {ansi}? ansiNonReserved #unquotedIdentifier
+    | {!ansi}? nonReserved    #unquotedIdentifier
     ;
 
 quotedIdentifier
@@ -981,7 +975,7 @@ number
     | MINUS? BIGDECIMAL_LITERAL       #bigDecimalLiteral
     ;
 
-// When `SQL_standard_keyword_behavior=true`, there are 2 kinds of keywords in Spark SQL.
+// When `spark.sql.dialect.spark.ansi.enabled=true`, there are 2 kinds of keywords in Spark SQL.
 // - Reserved keywords:
 //     Keywords that are reserved and can't be used as identifiers for table, view, column,
 //     function, alias, etc.
@@ -1181,9 +1175,9 @@ ansiNonReserved
     | YEARS
     ;
 
-// When `SQL_standard_keyword_behavior=false`, there are 2 kinds of keywords in Spark SQL.
+// When `spark.sql.dialect.spark.ansi.enabled=false`, there are 2 kinds of keywords in Spark SQL.
 // - Non-reserved keywords:
-//     Same definition as the one when `SQL_standard_keyword_behavior=true`.
+//     Same definition as the one when `spark.sql.dialect.spark.ansi.enabled=true`.
 // - Strict-non-reserved keywords:
 //     A strict version of non-reserved keywords, which can not be used as table alias.
 // You can find the full keywords list by searching "Start of the keywords list" in this file.

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1589,7 +1589,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
    */
   override def visitFunctionCall(ctx: FunctionCallContext): Expression = withOrigin(ctx) {
     // Create the function call.
-    val name = ctx.functionName.getText
+    val name = ctx.qualifiedName.getText
     val isDistinct = Option(ctx.setQuantifier()).exists(_.DISTINCT != null)
     val arguments = ctx.argument.asScala.map(expression) match {
       case Seq(UnresolvedStar(None))
@@ -1599,8 +1599,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
       case expressions =>
         expressions
     }
-    val function = UnresolvedFunction(
-      getFunctionIdentifier(ctx.functionName), arguments, isDistinct)
+    val function = UnresolvedFunction(visitFunctionName(ctx.qualifiedName), arguments, isDistinct)
 
     // Check if the function is evaluated in a windowed context.
     ctx.windowSpec match {
@@ -1640,17 +1639,6 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     }
   }
 
-  /**
-   * Get a function identifier consist by database (optional) and name.
-   */
-  protected def getFunctionIdentifier(ctx: FunctionNameContext): FunctionIdentifier = {
-    if (ctx.qualifiedName != null) {
-      visitFunctionName(ctx.qualifiedName)
-    } else {
-      FunctionIdentifier(ctx.getText, None)
-    }
-  }
-
   /**
    * Create an [[LambdaFunction]].
    */

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -28,7 +28,6 @@ import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.trees.Origin
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.SQLConf.Dialect
 import org.apache.spark.sql.types.{DataType, StructType}
 
 /**
@@ -89,20 +88,13 @@ abstract class AbstractSqlParser(conf: SQLConf) extends ParserInterface with Log
   protected def parse[T](command: String)(toResult: SqlBaseParser => T): T = {
     logDebug(s"Parsing command: $command")
 
-    // When we use PostgreSQL dialect or use Spark dialect with setting
-    // `spark.sql.dialect.spark.ansi.enabled=true`, the parser will use ANSI SQL standard keywords.
-    val SQLStandardKeywordBehavior = conf.dialect match {
-      case Dialect.POSTGRESQL => true
-      case Dialect.SPARK => conf.dialectSparkAnsiEnabled
-    }
-
     val lexer = new SqlBaseLexer(new UpperCaseCharStream(CharStreams.fromString(command)))
     lexer.removeErrorListeners()
     lexer.addErrorListener(ParseErrorListener)
     lexer.legacy_setops_precedence_enbled = conf.setOpsPrecedenceEnforced
     lexer.legacy_exponent_literal_as_decimal_enabled = conf.exponentLiteralAsDecimalEnabled
     lexer.legacy_create_hive_table_by_default_enabled = conf.createHiveTableByDefaultEnabled
-    lexer.SQL_standard_keyword_behavior = SQLStandardKeywordBehavior
+    lexer.SQL_standard_keyword_behavior = conf.dialectSparkAnsiEnabled
 
     val tokenStream = new CommonTokenStream(lexer)
     val parser = new SqlBaseParser(tokenStream)
@@ -112,7 +104,7 @@ abstract class AbstractSqlParser(conf: SQLConf) extends ParserInterface with Log
     parser.legacy_setops_precedence_enbled = conf.setOpsPrecedenceEnforced
     parser.legacy_exponent_literal_as_decimal_enabled = conf.exponentLiteralAsDecimalEnabled
     parser.legacy_create_hive_table_by_default_enabled = conf.createHiveTableByDefaultEnabled
-    parser.SQL_standard_keyword_behavior = SQLStandardKeywordBehavior
+    parser.SQL_standard_keyword_behavior = conf.dialectSparkAnsiEnabled
 
     try {
       try {

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2555,9 +2555,7 @@ class SQLConf extends Serializable with Logging {
 
   def intervalOutputStyle: IntervalStyle.Value = IntervalStyle.withName(getConf(INTERVAL_STYLE))
 
-  def dialect: Dialect.Value = Dialect.withName(getConf(DIALECT))
-
-  def usePostgreSQLDialect: Boolean = dialect == Dialect.POSTGRESQL
+  def usePostgreSQLDialect: Boolean = getConf(DIALECT) == Dialect.POSTGRESQL.toString
 
   def dialectSparkAnsiEnabled: Boolean = getConf(DIALECT_SPARK_ANSI_ENABLED)
 

diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/text.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/text.sql
@@ -44,7 +44,11 @@ select concat_ws(',',10,20,null,30);
 select concat_ws('',10,20,null,30);
 select concat_ws(NULL,10,20,null,30) is null;
 select reverse('abcde');
+-- [SPARK-28036] Built-in udf left/right has inconsistent behavior
+-- [SPARK-28479][SPARK-28989] Parser error when enabling ANSI mode
+set spark.sql.dialect.spark.ansi.enabled=false;
 select i, left('ahoj', i), right('ahoj', i) from range(-5, 6) t(i) order by i;
+set spark.sql.dialect.spark.ansi.enabled=true;
 -- [SPARK-28037] Add built-in String Functions: quote_literal
 -- select quote_literal('');
 -- select quote_literal('abc''');