-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-22546][SQL] Supporting for changing column dataType #19773
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -309,7 +309,7 @@ case class AlterTableChangeColumnCommand( | |||||||||||
| columnName: String, | ||||||||||||
| newColumn: StructField) extends RunnableCommand { | ||||||||||||
|
|
||||||||||||
| // TODO: support change column name/dataType/metadata/position. | ||||||||||||
| // TODO: support change column name/metadata/position. | ||||||||||||
| override def run(sparkSession: SparkSession): Seq[Row] = { | ||||||||||||
| val catalog = sparkSession.sessionState.catalog | ||||||||||||
| val table = catalog.getTableMetadata(tableName) | ||||||||||||
|
|
@@ -318,18 +318,34 @@ case class AlterTableChangeColumnCommand( | |||||||||||
|
|
||||||||||||
| // Find the origin column from dataSchema by column name. | ||||||||||||
| val originColumn = findColumnByName(table.dataSchema, columnName, resolver) | ||||||||||||
| // Throw an AnalysisException if the column name/dataType is changed. | ||||||||||||
| // Throw an AnalysisException if the column name is changed. | ||||||||||||
| if (!columnEqual(originColumn, newColumn, resolver)) { | ||||||||||||
| throw new AnalysisException( | ||||||||||||
| "ALTER TABLE CHANGE COLUMN is not supported for changing column " + | ||||||||||||
| s"'${originColumn.name}' with type '${originColumn.dataType}' to " + | ||||||||||||
| s"'${newColumn.name}' with type '${newColumn.dataType}'") | ||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you update this error message?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After add the type check, maybe we also need the type message in error message. |
||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| val typeChanged = originColumn.dataType != newColumn.dataType | ||||||||||||
| val partitionColumnChanged = table.partitionColumnNames.contains(originColumn.name) | ||||||||||||
|
|
||||||||||||
| // Throw an AnalysisException if the type of partition column is changed. | ||||||||||||
| if (typeChanged && partitionColumnChanged) { | ||||||||||||
|
||||||||||||
| throw new AnalysisException( | ||||||||||||
| "ALTER TABLE CHANGE COLUMN is not supported for changing partition column" + | ||||||||||||
| s"'${originColumn.name}' with type '${originColumn.dataType}' to " + | ||||||||||||
| s"'${newColumn.name}' with type '${newColumn.dataType}'") | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| val newDataSchema = table.dataSchema.fields.map { field => | ||||||||||||
| if (field.name == originColumn.name) { | ||||||||||||
| // Create a new column from the origin column with the new comment. | ||||||||||||
| addComment(field, newColumn.getComment) | ||||||||||||
| // Add the comment to a column, if comment is empty, return the original column. | ||||||||||||
| val newField = newColumn.getComment.map(field.withComment(_)).getOrElse(field) | ||||||||||||
| if (typeChanged) { | ||||||||||||
| newField.copy(dataType = newColumn.dataType) | ||||||||||||
| } else { | ||||||||||||
| newField | ||||||||||||
| } | ||||||||||||
| } else { | ||||||||||||
| field | ||||||||||||
| } | ||||||||||||
|
|
@@ -350,16 +366,11 @@ case class AlterTableChangeColumnCommand( | |||||||||||
| s"${schema.fieldNames.mkString("[`", "`, `", "`]")}")) | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| // Add the comment to a column, if comment is empty, return the original column. | ||||||||||||
| private def addComment(column: StructField, comment: Option[String]): StructField = { | ||||||||||||
| comment.map(column.withComment(_)).getOrElse(column) | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What happens if we need data conversion (e.g., from ing to double?) in binary formats (parquet and orc)? Also, What happens if we get incompatible type changes?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for advise, I should also check the type compatible, add in ef65c4d.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably, we need to comply with the Hive behaivour. Is the current fix (by casting) the same with Hive?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for your question, actually that's also what I'm consider during do the compatible check. Hive do this column type change work in HiveAlterHandler and the detailed compatible check is in ColumnType. You can see in the ColumnType checking work, it actually use the
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, ok. Thanks for the check. btw, have you checked if this could work correctly?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In my opinion, in this pr, we need an additional logic to cast input data into a changed type in catalog when reading....
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for your advise! The work of adding logic to cast input data into changed type in catalog may need modifying 4 parts logic including vectorized reader and row reader in parquet and orc. If we don't agree the currently behavior, I'll keep following these.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the check!. I think we don't always need to comply with the Hive behaivour and an understandable behaivour for users is the best.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for pinging me, @maropu . |
||||||||||||
| // Compare a [[StructField]] to another, return true if they have the same column | ||||||||||||
| // name(by resolver) and dataType. | ||||||||||||
| private def columnEqual( | ||||||||||||
| field: StructField, other: StructField, resolver: Resolver): Boolean = { | ||||||||||||
| resolver(field.name, other.name) && field.dataType == other.dataType | ||||||||||||
| resolver(field.name, other.name) | ||||||||||||
| } | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
|
|
||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1697,6 +1697,16 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { | |
| sql("ALTER TABLE dbx.tab1 CHANGE COLUMN col1 col1 INT COMMENT 'this is col1'") | ||
| assert(getMetadata("col1").getString("key") == "value") | ||
| assert(getMetadata("col1").getString("comment") == "this is col1") | ||
|
|
||
| // Ensure that changing column type takes effect | ||
| sql("ALTER TABLE dbx.tab1 CHANGE COLUMN col1 col1 STRING") | ||
| val column = catalog.getTableMetadata(tableIdent).schema.fields.find(_.name == "col1") | ||
| assert(column.get.dataType == StringType) | ||
|
|
||
| // Ensure that changing partition column type throw exception | ||
| intercept[AnalysisException] { | ||
| sql("ALTER TABLE dbx.tab1 CHANGE COLUMN a a STRING") | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please compare the error message.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, done in ef65c4d. Also add check for type compatible check. |
||
| } | ||
|
|
||
| test("drop build-in function") { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Its ok to check names only?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, not enough yet, add type compatible check in ef65c4d.