-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-24204][SQL] Verify a schema in Json/Orc/ParquetFileFormat #21389
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from 1 commit
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
315c7e8
Fix
maropu 731c873
Fix
maropu cf4cf2f
Fix
maropu 3158e00
Fix
maropu 0b25c4d
Merge with CSV.verifySChema
maropu 927497d
Fix
maropu 589479d
Add tests for CSV
maropu df1a67f
Keep backward-compatibility
maropu 6303e49
Brush up code
maropu 6301fb4
Spit test cases into pieces
maropu 50e7b11
Review applied
maropu 1cfc7b0
Fix
maropu File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Keep backward-compatibility
- Loading branch information
commit df1a67f8409c0c81016f5ae9adfe608f0a988273
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -207,8 +207,8 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext with Befo | |
|
|
||
| // Unsupported data types of csv, json, orc, and parquet are as follows; | ||
| // csv -> R/W: Interval, Null, Array, Map, Struct | ||
| // json -> R/W: Interval | ||
| // orc -> R/W: Interval, Null | ||
| // json -> W: Interval | ||
| // orc -> W: Interval, Null | ||
| // parquet -> R/W: Interval, Null | ||
| test("SPARK-24204 error handling for unsupported data types") { | ||
| withTempDir { dir => | ||
|
|
@@ -267,7 +267,39 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext with Befo | |
| withTempDir { dir => | ||
| val tempDir = new File(dir, "files").getCanonicalPath | ||
|
|
||
| Seq("parquet", "orc", "json", "csv").foreach { format => | ||
| Seq("orc", "json").foreach { format => | ||
| // write path | ||
| var msg = intercept[AnalysisException] { | ||
| sql("select interval 1 days").write.format(format).mode("overwrite").save(tempDir) | ||
| }.getMessage | ||
| assert(msg.contains("Cannot save interval data type into external storage.")) | ||
|
|
||
| msg = intercept[UnsupportedOperationException] { | ||
| spark.udf.register("testType", () => new IntervalData()) | ||
| sql("select testType()").write.format(format).mode("overwrite").save(tempDir) | ||
| }.getMessage | ||
| assert(msg.toLowerCase(Locale.ROOT) | ||
| .contains(s"$format data source does not support calendarinterval data type.")) | ||
|
|
||
| // read path | ||
| // We expect the types below should be passed for backward-compatibility | ||
|
|
||
| // Interval type | ||
| var schema = StructType(StructField("a", CalendarIntervalType, true) :: Nil) | ||
| spark.range(1).write.format(format).mode("overwrite").save(tempDir) | ||
| spark.read.schema(schema).format(format).load(tempDir).collect() | ||
|
|
||
| // UDT having interval data | ||
| schema = StructType(StructField("a", new IntervalUDT(), true) :: Nil) | ||
| spark.range(1).write.format(format).mode("overwrite").save(tempDir) | ||
| spark.read.schema(schema).format(format).load(tempDir).collect() | ||
| } | ||
| } | ||
|
|
||
| withTempDir { dir => | ||
| val tempDir = new File(dir, "files").getCanonicalPath | ||
|
|
||
| Seq("parquet", "csv").foreach { format => | ||
|
||
| // write path | ||
| var msg = intercept[AnalysisException] { | ||
| sql("select interval 1 days").write.format(format).mode("overwrite").save(tempDir) | ||
|
|
@@ -299,7 +331,36 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext with Befo | |
| .contains(s"$format data source does not support calendarinterval data type.")) | ||
| } | ||
|
|
||
| Seq("parquet", "orc", "csv").foreach { format => | ||
| Seq("orc").foreach { format => | ||
| // write path | ||
| var msg = intercept[UnsupportedOperationException] { | ||
| sql("select null").write.format(format).mode("overwrite").save(tempDir) | ||
| }.getMessage | ||
| assert(msg.toLowerCase(Locale.ROOT) | ||
| .contains(s"$format data source does not support null data type.")) | ||
|
|
||
| msg = intercept[UnsupportedOperationException] { | ||
| spark.udf.register("testType", () => new NullData()) | ||
| sql("select testType()").write.format(format).mode("overwrite").save(tempDir) | ||
| }.getMessage | ||
| assert(msg.toLowerCase(Locale.ROOT) | ||
| .contains(s"$format data source does not support null data type.")) | ||
|
|
||
| // read path | ||
| // We expect the types below should be passed for backward-compatibility | ||
|
|
||
| // Null type | ||
| var schema = StructType(StructField("a", NullType, true) :: Nil) | ||
| spark.range(1).write.format(format).mode("overwrite").save(tempDir) | ||
| spark.read.schema(schema).format(format).load(tempDir).collect() | ||
|
|
||
| // UDT having null data | ||
| schema = StructType(StructField("a", new NullUDT(), true) :: Nil) | ||
| spark.range(1).write.format(format).mode("overwrite").save(tempDir) | ||
| spark.read.schema(schema).format(format).load(tempDir).collect() | ||
| } | ||
|
|
||
| Seq("parquet", "csv").foreach { format => | ||
| // write path | ||
| var msg = intercept[UnsupportedOperationException] { | ||
| sql("select null").write.format(format).mode("overwrite").save(tempDir) | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Write a comment above this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok