-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-24681][SQL] Verify nested column names in Hive metastore #21711
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -138,17 +138,35 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat | |
| } | ||
|
|
||
| /** | ||
| * Checks the validity of data column names. Hive metastore disallows the table to use comma in | ||
| * data column names. Partition columns do not have such a restriction. Views do not have such | ||
| * a restriction. | ||
| * Checks the validity of data column names. Hive metastore disallows the table to use some | ||
| * special characters (',', ':', and ';') in data column names. Partition columns do not have | ||
| * such a restriction. Views do not have such a restriction. | ||
| */ | ||
| private def verifyDataSchema( | ||
| tableName: TableIdentifier, tableType: CatalogTableType, dataSchema: StructType): Unit = { | ||
| if (tableType != VIEW) { | ||
| dataSchema.map(_.name).foreach { colName => | ||
| if (colName.contains(",")) { | ||
| throw new AnalysisException("Cannot create a table having a column whose name contains " + | ||
| s"commas in Hive metastore. Table: $tableName; Column: $colName") | ||
| val invalidChars = Seq(",", ":", ";") | ||
| def verifyNestedColumnNames(schema: StructType): Unit = schema.foreach { f => | ||
| f.dataType match { | ||
| case st: StructType => verifyNestedColumnNames(st) | ||
| case _ if invalidChars.exists(f.name.contains) => | ||
| throw new AnalysisException("Cannot create a table having a nested column whose name " + | ||
| s"contains invalid characters (${invalidChars.map(c => s"'$c'").mkString(", ")}) " + | ||
|
||
| s"in Hive metastore. Table: $tableName; Column: ${f.name}") | ||
| case _ => | ||
| } | ||
| } | ||
|
|
||
| dataSchema.foreach { f => | ||
| f.dataType match { | ||
| // Checks top-level column names | ||
| case _ if f.name.contains(",") => | ||
| throw new AnalysisException("Cannot create a table having a column whose name " + | ||
| s"contains commas in Hive metastore. Table: $tableName; Column: ${f.name}") | ||
| // Checks nested column names | ||
| case st: StructType => | ||
| verifyNestedColumnNames(st) | ||
| case _ => | ||
| } | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2005,6 +2005,24 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { | |
| } | ||
| } | ||
|
|
||
| test("SPARK-24681 checks if nested column names do not include ',', ':', and ';'") { | ||
|
||
| val expectedMsg = "Cannot create a table having a nested column whose name contains invalid " + | ||
| "characters (',', ':', ';') in Hive metastore." | ||
|
|
||
| Seq("nested,column", "nested:column", "nested;column").foreach { nestedColumnName => | ||
| withTable("t") { | ||
| val e = intercept[AnalysisException] { | ||
| spark.range(1) | ||
| .select(struct(lit(0).as(nestedColumnName)).as("toplevel")) | ||
| .write | ||
| .format("hive") | ||
| .saveAsTable("t") | ||
| }.getMessage | ||
| assert(e.contains(expectedMsg)) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| test("SPARK-19912 String literals should be escaped for Hive metastore partition pruning") { | ||
| withTable("spark_19912") { | ||
| Seq( | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok