Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
1e647ee
Share code to check column name duplication
maropu Apr 25, 2017
4467077
Apply reviews
maropu Jun 13, 2017
33ab217
Make code more consistent
maropu Jun 13, 2017
d8efb9d
Apply review comments
maropu Jun 16, 2017
11d1818
Apply xiao's reviews
maropu Jun 16, 2017
22e1e4f
Apply more xiao's reviews
maropu Jun 17, 2017
743a069
Replace map with foreach
maropu Jun 20, 2017
f6eab2d
Add tests for data schema + parititon schema
maropu Jun 20, 2017
09da8d6
Drop name dplication checks in HiveMetastoreCatalog.scala
maropu Jun 20, 2017
6d03f31
Modify exception messages
maropu Jun 20, 2017
a0b9b05
Revert logic to check name duplication
maropu Jun 20, 2017
91b6424
Add tests for write paths
maropu Jun 21, 2017
37ad3f3
Add tests for stream sink paths
maropu Jun 21, 2017
d0d9d3e
Burhs up code and adds more tests
maropu Jun 25, 2017
cbe9c71
Apply reviews
maropu Jun 26, 2017
c69270f
Apply more comments
maropu Jun 27, 2017
af959f6
Add more tests in create.sql
maropu Jun 27, 2017
8d3e10a
Move duplication checks in constructor
maropu Jun 29, 2017
9b386d5
Brush up code
maropu Jun 30, 2017
a878510
[WIP] Add DataSourceValidator trait to validate schema in write path
maropu Jul 3, 2017
be20127
Revert "Brush up code"
maropu Jul 3, 2017
f41bf80
Fix more issues
maropu Jul 4, 2017
0526391
Revert DataSourceValidator
maropu Jul 4, 2017
9e199bc
Add the check for external relation providers
maropu Jul 4, 2017
1ae132d
[WIP] Handle DataSource name duplication in one place
maropu Jul 5, 2017
5c29a75
Fix more
maropu Jul 6, 2017
5ed2c0d
Move some tests to DDLSuite
maropu Jul 7, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Apply reviews
  • Loading branch information
maropu committed Jul 6, 2017
commit cbe9c71b3c6d131f698e9ab580122120dd98b626
Original file line number Diff line number Diff line change
Expand Up @@ -24,82 +24,60 @@ import org.apache.spark.sql.types.StructType

class SchemaUtilsSuite extends SparkFunSuite {

test("Check column name duplication in case-sensitive cases") {
def checkCaseSensitiveExceptionCases(schemaStr: String, duplicatedColumns: String): Unit = {
val expectedErrorMsg = s"Found duplicate column(s) in SchemaUtilsSuite: $duplicatedColumns"
val schema = StructType.fromDDL(schemaStr)
var msg = intercept[AnalysisException] {
SchemaUtils.checkSchemaColumnNameDuplication(
schema, "in SchemaUtilsSuite", caseSensitiveAnalysis = true)
}.getMessage
assert(msg.contains(expectedErrorMsg))
msg = intercept[AnalysisException] {
SchemaUtils.checkColumnNameDuplication(
schema.map(_.name), "in SchemaUtilsSuite", caseSensitiveResolution)
}.getMessage
assert(msg.contains(expectedErrorMsg))
msg = intercept[AnalysisException] {
SchemaUtils.checkColumnNameDuplication(
schema.map(_.name), "in SchemaUtilsSuite", caseSensitiveAnalysis = true)
}.getMessage
assert(msg.contains(expectedErrorMsg))
private def resolver(caseSensitiveAnalysis: Boolean): Resolver = {
if (caseSensitiveAnalysis) {
caseSensitiveResolution
} else {
caseInsensitiveResolution
}
}

checkCaseSensitiveExceptionCases("a INT, b INT, a INT", "`a`")
checkCaseSensitiveExceptionCases("a INT, b INT, a INT, a INT", "`a`")
checkCaseSensitiveExceptionCases("a INT, b INT, a INT, b INT", "`b`, `a`")
checkCaseSensitiveExceptionCases("a INT, c INT, b INT, a INT, b INT, c INT", "`b`, `a`, `c`")
Seq((true, ("a", "a"), ("b", "b")), (false, ("a", "A"), ("b", "B"))).foreach {
case (caseSensitive, (a0, a1), (b0, b1)) =>

// Check no exception thrown
def checkCaseSensitiveNoExceptionCases(schemaStr: String): Unit = {
val schema = StructType.fromDDL(schemaStr)
SchemaUtils.checkSchemaColumnNameDuplication(
schema, "in SchemaUtilsSuite", caseSensitiveAnalysis = true)
SchemaUtils.checkColumnNameDuplication(
schema.map(_.name), "in SchemaUtilsSuite", caseSensitiveResolution)
SchemaUtils.checkColumnNameDuplication(
schema.map(_.name), "in SchemaUtilsSuite", caseSensitiveAnalysis = true)
}
val testType = if (caseSensitive) "case-sensitive" else "case-insensitive"
test(s"Check column name duplication in $testType cases") {
def checkExceptionCases(schemaStr: String, duplicatedColumns: Seq[String]): Unit = {
val expectedErrorMsg = "Found duplicate column(s) in SchemaUtilsSuite: " +
duplicatedColumns.map(c => s"`${c.toLowerCase}`").mkString(", ")
val schema = StructType.fromDDL(schemaStr)
var msg = intercept[AnalysisException] {
SchemaUtils.checkSchemaColumnNameDuplication(
schema, "in SchemaUtilsSuite", caseSensitiveAnalysis = caseSensitive)
}.getMessage
assert(msg.contains(expectedErrorMsg))
msg = intercept[AnalysisException] {
SchemaUtils.checkColumnNameDuplication(
schema.map(_.name), "in SchemaUtilsSuite", resolver(caseSensitive))
}.getMessage
assert(msg.contains(expectedErrorMsg))
msg = intercept[AnalysisException] {
SchemaUtils.checkColumnNameDuplication(
schema.map(_.name), "in SchemaUtilsSuite", caseSensitiveAnalysis = caseSensitive)
}.getMessage
assert(msg.contains(expectedErrorMsg))
}

checkCaseSensitiveNoExceptionCases("a INT, b INT, c INT")
checkCaseSensitiveNoExceptionCases("Aa INT, b INT, aA INT")
checkExceptionCases(s"$a0 INT, b INT, $a1 INT", a0 :: Nil)
checkExceptionCases(s"$a0 INT, b INT, $a1 INT, $a0 INT", a0 :: Nil)
checkExceptionCases(s"$a0 INT, $b0 INT, $a1 INT, $a0 INT, $b1 INT", b0 :: a0 :: Nil)
}
}

test("Check column name duplication in case-insensitive cases") {
def checkCaseInsensitiveExceptionCases(schemaStr: String, duplicatedColumns: String): Unit = {
val expectedErrorMsg = s"Found duplicate column(s) in SchemaUtilsSuite: $duplicatedColumns"
test("Check no exception thrown for valid schemas") {
def checkNoExceptionCases(schemaStr: String, caseSensitive: Boolean): Unit = {
val schema = StructType.fromDDL(schemaStr)
var msg = intercept[AnalysisException] {
SchemaUtils.checkSchemaColumnNameDuplication(
schema, "in SchemaUtilsSuite", caseSensitiveAnalysis = false)
}.getMessage
assert(msg.contains(expectedErrorMsg))
msg = intercept[AnalysisException] {
SchemaUtils.checkColumnNameDuplication(
schema.map(_.name), "in SchemaUtilsSuite", caseInsensitiveResolution)
}.getMessage
assert(msg.contains(expectedErrorMsg))
msg = intercept[AnalysisException] {
SchemaUtils.checkColumnNameDuplication(
schema.map(_.name), "in SchemaUtilsSuite", caseSensitiveAnalysis = false)
}.getMessage
assert(msg.contains(expectedErrorMsg))
SchemaUtils.checkSchemaColumnNameDuplication(
schema, "in SchemaUtilsSuite", caseSensitiveAnalysis = caseSensitive)
SchemaUtils.checkColumnNameDuplication(
schema.map(_.name), "in SchemaUtilsSuite", resolver(caseSensitive))
SchemaUtils.checkColumnNameDuplication(
schema.map(_.name), "in SchemaUtilsSuite", caseSensitiveAnalysis = caseSensitive)
}

checkCaseInsensitiveExceptionCases("Aa INT, b INT, Aa INT", "`aa`")
checkCaseInsensitiveExceptionCases("a INT, bB INT, Bb INT", "`bb`")
checkCaseInsensitiveExceptionCases("Aa INT, b INT, Aa INT, c INT, aa INT", "`aa`")
checkCaseInsensitiveExceptionCases("Aa INT, bB INT, Bb INT, aa INT", "`bb`, `aa`")
checkCaseInsensitiveExceptionCases(
"Aa INT, cc INT, bB INT, cC INT, Bb INT, aa INT", "`bb`, `cc`, `aa`")
checkNoExceptionCases("a INT, b INT, c INT", caseSensitive = true)
checkNoExceptionCases("Aa INT, b INT, aA INT", caseSensitive = true)

// Check no exception thrown
val schema = StructType.fromDDL("a INT, b INT, c INT")
SchemaUtils.checkSchemaColumnNameDuplication(
schema, "in SchemaUtilsSuite", caseSensitiveAnalysis = false)
SchemaUtils.checkColumnNameDuplication(
schema.map(_.name), "in SchemaUtilsSuite", caseInsensitiveResolution)
SchemaUtils.checkColumnNameDuplication(
schema.map(_.name), "in SchemaUtilsSuite", caseSensitiveAnalysis = false)
checkNoExceptionCases("a INT, b INT, c INT", caseSensitive = false)
}
}
4 changes: 2 additions & 2 deletions sql/core/src/test/resources/sql-tests/results/create.sql.out
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ CREATE TABLE t (c0 STRING, c1 INT, c1 DOUBLE, c0 INT) USING parquet
struct<>
-- !query 1 output
org.apache.spark.sql.AnalysisException
Found duplicate column(s) in the table definition of `t`: `c0`, `c1`;
Found duplicate column(s) in the table definition of `t`: `c1`, `c0`;


-- !query 2
Expand All @@ -33,7 +33,7 @@ CREATE TABLE t (c0 STRING, c1 INT, c1 DOUBLE, c0 INT) USING parquet
struct<>
-- !query 3 output
org.apache.spark.sql.AnalysisException
Found duplicate column(s) in the table definition of `t`: `c0`, `c1`;
Found duplicate column(s) in the table definition of `t`: `c1`, `c0`;


-- !query 4
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,6 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be
df.toDF().write.mode("overwrite").text(testDir.getAbsolutePath)
val e = intercept[AnalysisException] {
spark.read.format(format).option("inferSchema", true).load(testDir.getAbsolutePath)

}
assert(e.getMessage.contains("Found duplicate column(s) in the data schema:"))
}
Expand Down