Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class DefaultSource extends HadoopFsRelationProvider with DataSourceRegister {
partitionColumns: Option[StructType],
parameters: Map[String, String]): HadoopFsRelation = {
dataSchema.foreach(verifySchema)
new TextRelation(None, partitionColumns, paths)(sqlContext)
new TextRelation(None, dataSchema, partitionColumns, paths)(sqlContext)
}

override def shortName(): String = "text"
Expand All @@ -70,15 +70,16 @@ class DefaultSource extends HadoopFsRelationProvider with DataSourceRegister {

private[sql] class TextRelation(
val maybePartitionSpec: Option[PartitionSpec],
val textSchema: Option[StructType],
override val userDefinedPartitionColumns: Option[StructType],
override val paths: Array[String] = Array.empty[String],
parameters: Map[String, String] = Map.empty[String, String])
(@transient val sqlContext: SQLContext)
extends HadoopFsRelation(maybePartitionSpec, parameters) {

/** Data schema is always a single column, named "value". */
override def dataSchema: StructType = new StructType().add("value", StringType)

/** Data schema is always a single column, named "value" if original Data source has no schema. */
override def dataSchema: StructType =
textSchema.getOrElse(new StructType().add("value", StringType))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we make sure that textSchema is a struct type that has only one string field?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@cloud-fan DefaultSource.scala is the only place that creates a TextRelation, and it verifies that the schema is size 1 and of type string before creating a TextRelation. So I think it is fine not to verify again here. What do you think?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh, then it's fine

/** This is an internal data source that outputs internal row format. */
override val needConversion: Boolean = false

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,17 @@ class TextSuite extends QueryTest with SharedSQLContext {
}
}

test("SPARK-12562 verify write.text() can handle column name beyond `value`") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why don't you just change the existing test case to rename the dataframe column and leave the following as a comment there?

SPARK-12562 verify write.text() can handle column name beyond value

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rxin I thought about it, but was not sure if it was a good idea to change the existing testcase. In the existing test, should I add a second dataframe with column renamed, or just replace the original dataframe with column renaming?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just replace the original one to something weird, like "adwrasdf"

val df = sqlContext.read.text(testFile).withColumnRenamed("value", "col1")

val tempFile = Utils.createTempDir()
tempFile.delete()
df.write.text(tempFile.getCanonicalPath)
verifyFrame(sqlContext.read.text(tempFile.getCanonicalPath))

Utils.deleteRecursively(tempFile)
}

private def testFile: String = {
Thread.currentThread().getContextClassLoader.getResource("text-suite.txt").toString
}
Expand Down