Skip to content
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Address comments.
  • Loading branch information
dongjoon-hyun committed Jan 17, 2018
commit c67809c9dbe0a21011649dceededa84d73377d1c
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,19 @@ import org.apache.spark.sql.test.SharedSQLContext
class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext {
import testImplicits._

Seq("orc", "parquet", "csv", "json", "text").foreach { format =>
private val allFileBasedDataSources = Seq("orc", "parquet", "csv", "json", "text")

allFileBasedDataSources.foreach { format =>
test(s"Writing empty datasets should not fail - $format") {
withTempPath { dir =>
Seq("str").toDS().limit(0).write.format(format).save(dir.getCanonicalPath)
}
}
}

Seq("orc", "parquet", "csv", "json").foreach { format =>
test(s"SPARK-23072 Write and read back unicode schema - $format") {
// `TEXT` data source always has a single column whose name is `value`.
allFileBasedDataSources.filterNot(_ == "text").foreach { format =>
test(s"SPARK-23072 Write and read back unicode column names - $format") {
withTempPath { path =>
val dir = path.getCanonicalPath

Expand All @@ -42,12 +45,14 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext {
df.write.format(format).option("header", "true").save(dir)
val answerDf = spark.read.format(format).option("header", "true").load(dir)

assert(df.schema === answerDf.schema)
assert(df.schema.sameType(answerDf.schema))
checkAnswer(df, answerDf)
}
}
}

// Only ORC/Parquet support this. `CSV` and `JSON` returns an empty schema.
// `TEXT` data source always has a single column whose name is `value`.
Seq("orc", "parquet").foreach { format =>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only these two formats support it? If so, please add the comments.

This is the same comment to the other test cases. Otherwise, add all of them for each test case.

You can define a global Seq to include all the built-in file formats we support.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!

  1. Only two support this. I added comments.
  2. For the other test cases, I did.
  3. I added a global Seq, allFileBasedDataSources.

test(s"SPARK-15474 Write and read back non-emtpy schema with empty dataframe - $format") {
withTempPath { file =>
Expand All @@ -62,7 +67,7 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext {
}
}

Seq("orc", "parquet", "csv", "json", "text").foreach { format =>
allFileBasedDataSources.foreach { format =>
test(s"SPARK-22146 read files containing special characters using $format") {
val nameWithSpecialChars = s"sp&cial%chars"
withTempDir { dir =>
Expand Down