[SPARK-6686][SQL] Use resolved output instead of names for toDF rename

marmbrus · marmbrus · commit 052dee070783 · 2015-04-02T18:30:55.000-07:00
This is a workaround for a problem reported on the user list. This doesn't fix the core problem, but in general is a more robust way to do renames. Author: Michael Armbrust <michael@databricks.com> Closes #5337 from marmbrus/toDFrename and squashes the following commits: 6a3159d [Michael Armbrust] [SPARK-6686][SQL] Use resolved output instead of names for toDF rename
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -240,8 +240,8 @@ class DataFrame private[sql](
         s"Old column names (${schema.size}): " + schema.fields.map(_.name).mkString(", ") + "\n" +
         s"New column names (${colNames.size}): " + colNames.mkString(", "))
 
-    val newCols = schema.fieldNames.zip(colNames).map { case (oldName, newName) =>
-      apply(oldName).as(newName)
+    val newCols = logicalPlan.output.zip(colNames).map { case (oldAttribute, newName) =>
+      Column(oldAttribute).as(newName)
     }
     select(newCols :_*)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -60,6 +60,14 @@ class DataFrameSuite extends QueryTest {
     assert($"test".toString === "test")
   }
 
+  test("rename nested groupby") {
+    val df = Seq((1,(1,1))).toDF()
+
+    checkAnswer(
+      df.groupBy("_1").agg(col("_1"), sum("_2._1")).toDF("key", "total"),
+      Row(1, 1) :: Nil)
+  }
+
   test("invalid plan toString, debug mode") {
     val oldSetting = TestSQLContext.conf.dataFrameEagerAnalysis
     TestSQLContext.setConf(SQLConf.DATAFRAME_EAGER_ANALYSIS, "true")

Original file line number	Diff line number	Diff line change
`@@ -240,8 +240,8 @@ class DataFrame private[sql](`
`240`	`240`	`s"Old column names (${schema.size}): " + schema.fields.map(_.name).mkString(", ") + "\n" +`
`241`	`241`	`s"New column names (${colNames.size}): " + colNames.mkString(", "))`
`242`	`242`
`243`		`- val newCols = schema.fieldNames.zip(colNames).map { case (oldName, newName) =>`
`244`		`- apply(oldName).as(newName)`
	`243`	`+ val newCols = logicalPlan.output.zip(colNames).map { case (oldAttribute, newName) =>`
	`244`	`+ Column(oldAttribute).as(newName)`
`245`	`245`	`}`
`246`	`246`	`select(newCols :_*)`
`247`	`247`	`}`