[SPARK-34556][SQL] Checking duplicate static partition columns should…

… respect case sensitive conf This PR makes partition spec parsing respect case sensitive conf. When parsing the partition spec, Spark will call `org.apache.spark.sql.catalyst.parser.ParserUtils.checkDuplicateKeys` to check if there are duplicate partition column names in the list. But this method is always case sensitive and doesn't detect duplicate partition column names when using different cases. Yep. This prevents users from writing incorrect queries such as `INSERT OVERWRITE t PARTITION (c='2', C='3') VALUES (1)` when they don't enable case sensitive conf. The new added test will fail without this change. Closes #31669 from zsxwing/SPARK-34556. Authored-by: Shixiong Zhu <[email protected]> Signed-off-by: HyukjinKwon <[email protected]>
apache · zsxwing · Mar 1, 2021 · Mar 3, 2021 · bf244b60169bcf7a75342104a8f2aa822c30087f
commit bf244b60169bcf7a75342104a8f2aa822c30087f
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -500,7 +500,11 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     // Before calling `toMap`, we check duplicated keys to avoid silently ignore partition values
     // in partition spec like PARTITION(a='1', b='2', a='3'). The real semantical check for
     // partition columns will be done in analyzer.
-    checkDuplicateKeys(parts, ctx)
+    if (conf.caseSensitiveAnalysis) {
+      checkDuplicateKeys(parts, ctx)
+    } else {
+      checkDuplicateKeys(parts.map(kv => kv._1.toLowerCase(Locale.ROOT) -> kv._2), ctx)
+    }
     parts.toMap
   }
 

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
@@ -353,6 +353,17 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     assert(e.contains("Found duplicate keys 'key1'"))
   }
 
+  test("SPARK-34556: duplicate keys in partition spec") {
+    val e = intercept[ParseException] {
+      parser.parsePlan("INSERT OVERWRITE t PARTITION (c='2', C='3') VALUES (1)")
+    }.getMessage
+    assert(e.contains("Found duplicate keys 'c'"))
+    val conf = new SQLConf()
+    conf.setConf(SQLConf.CASE_SENSITIVE, true)
+    val caseSensitiveParser = new SparkSqlParser(conf)
+    caseSensitiveParser.parsePlan("INSERT OVERWRITE t PARTITION (c='2', C='3') VALUES (1)")
+  }
+
   test("duplicate columns in partition specs") {
     val e = intercept[ParseException] {
       parser.parsePlan(