Revert 882c538 & 8426ddc, which introduced regression

apache · liancheng · Apr 8, 2014 · Apr 8, 2014 · Apr 9, 2014 · Apr 9, 2014
commit 5bdbfe7170e0bfbb09a7b43aec04dc4a1ee866f2
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -121,7 +121,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   def cacheTable(tableName: String): Unit = {
     val currentTable = catalog.lookupRelation(None, tableName)
     val asInMemoryRelation =
-      InMemoryColumnarTableScan(executePlan(currentTable).executedPlan)
+      InMemoryColumnarTableScan(currentTable.output, executePlan(currentTable).executedPlan)
 
     catalog.registerTable(None, tableName, SparkLogicalPlan(asInMemoryRelation))
   }
@@ -131,7 +131,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
     EliminateAnalysisOperators(catalog.lookupRelation(None, tableName)) match {
       // This is kind of a hack to make sure that if this was just an RDD registered as a table,
       // we reregister the RDD as a table.
-      case SparkLogicalPlan(inMem @ InMemoryColumnarTableScan(e: ExistingRdd)) =>
+      case SparkLogicalPlan(inMem @ InMemoryColumnarTableScan(_, e: ExistingRdd)) =>
         inMem.cachedColumnBuffers.unpersist()
         catalog.unregisterTable(None, tableName)
         catalog.registerTable(None, tableName, SparkLogicalPlan(e))

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala
@@ -21,23 +21,23 @@ import org.apache.spark.sql.catalyst.expressions.{GenericMutableRow, Attribute}
 import org.apache.spark.sql.execution.{SparkPlan, LeafNode}
 import org.apache.spark.sql.Row
 
-private[sql] case class InMemoryColumnarTableScan(child: SparkPlan)
+private[sql] case class InMemoryColumnarTableScan(attributes: Seq[Attribute], child: SparkPlan)
   extends LeafNode {
 
-  override def output: Seq[Attribute] = child.output
+  override def output: Seq[Attribute] = attributes
 
   lazy val cachedColumnBuffers = {
-    val childOutput = child.output
+    val output = child.output
     val cached = child.execute().mapPartitions { iterator =>
-      val columnBuilders = childOutput.map { attribute =>
+      val columnBuilders = output.map { attribute =>
         ColumnBuilder(ColumnType(attribute.dataType).typeId, 0, attribute.name)
       }.toArray
 
       var row: Row = null
       while (iterator.hasNext) {
         row = iterator.next()
         var i = 0
-        while (i < childOutput.length) {
+        while (i < row.length) {
           columnBuilders(i).appendFrom(row, i)
           i += 1
         }

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -70,7 +70,8 @@ case class SparkLogicalPlan(alreadyPlanned: SparkPlan)
     SparkLogicalPlan(
       alreadyPlanned match {
         case ExistingRdd(output, rdd) => ExistingRdd(output.map(_.newInstance), rdd)
-        case scan @ InMemoryColumnarTableScan(child) => scan
+        case scan @ InMemoryColumnarTableScan(output, child) =>
+          scan.copy(attributes = output.map(_.newInstance))
         case _ => sys.error("Multiple instance of the same relation detected.")
       }).asInstanceOf[this.type]
   }

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala
@@ -28,14 +28,14 @@ class InMemoryColumnarQuerySuite extends QueryTest {
 
   test("simple columnar query") {
     val plan = TestSQLContext.executePlan(testData.logicalPlan).executedPlan
-    val scan = SparkLogicalPlan(InMemoryColumnarTableScan(plan))
+    val scan = SparkLogicalPlan(InMemoryColumnarTableScan(plan.output, plan))
 
     checkAnswer(scan, testData.collect().toSeq)
   }
 
   test("projection") {
     val plan = TestSQLContext.executePlan(testData.select('value, 'key).logicalPlan).executedPlan
-    val scan = SparkLogicalPlan(InMemoryColumnarTableScan(plan))
+    val scan = SparkLogicalPlan(InMemoryColumnarTableScan(plan.output, plan))
 
     checkAnswer(scan, testData.collect().map {
       case Row(key: Int, value: String) => value -> key
@@ -44,7 +44,7 @@ class InMemoryColumnarQuerySuite extends QueryTest {
 
   test("SPARK-1436 regression: in-memory columns must be able to be accessed multiple times") {
     val plan = TestSQLContext.executePlan(testData.logicalPlan).executedPlan
-    val scan = SparkLogicalPlan(InMemoryColumnarTableScan(plan))
+    val scan = SparkLogicalPlan(InMemoryColumnarTableScan(plan.output, plan))
 
     checkAnswer(scan, testData.collect().toSeq)
     checkAnswer(scan, testData.collect().toSeq)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -120,7 +120,7 @@ class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with Logging {
         castChildOutput(p, table, child)
 
       case p @ logical.InsertIntoTable(SparkLogicalPlan(InMemoryColumnarTableScan(
-        HiveTableScan(_, table, _))), _, child, _) =>
+        _, HiveTableScan(_, table, _))), _, child, _) =>
         castChildOutput(p, table, child)
     }
 

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -44,7 +44,7 @@ trait HiveStrategies {
       case logical.InsertIntoTable(table: MetastoreRelation, partition, child, overwrite) =>
         InsertIntoHiveTable(table, partition, planLater(child), overwrite)(hiveContext) :: Nil
       case logical.InsertIntoTable(SparkLogicalPlan(InMemoryColumnarTableScan(
-        HiveTableScan(_, table, _))), partition, child, overwrite) =>
+        _, HiveTableScan(_, table, _))), partition, child, overwrite) =>
         InsertIntoHiveTable(table, partition, planLater(child), overwrite)(hiveContext) :: Nil
       case _ => Nil
     }