fix review comments

apache · clockfly · Aug 19, 2016 · Aug 23, 2016 · Aug 23, 2016 · Aug 23, 2016
commit 7190eb0c2a4dce2c5b84c29fb90bb2def23a3520
diff --git a/...alyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala b/...alyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala
@@ -432,6 +432,12 @@ abstract class DeclarativeAggregate
  *  4. After processing all input aggregation objects of current group (group by key), the framework
  *     calls method `eval(buffer: T)` to generate the final output for this group.
  *  5. The framework moves on to next group, until all groups have been processed.
+ *
+ * NOTE: SQL with TypedImperativeAggregate functions is planned in sort based aggregation,
+ * instead of hash based aggregation, as TypedImperativeAggregate use BinaryType as aggregation
+ * buffer's storage format, which is not supported by hash based aggregation. Hash based
+ * aggregation only support aggregation buffer of mutable types (like LongType, IntType that have
+ * fixed length and can be mutated in place in UnsafeRow)
  */
 abstract class TypedImperativeAggregate[T] extends ImperativeAggregate {
 
@@ -507,8 +513,9 @@ abstract class TypedImperativeAggregate[T] extends ImperativeAggregate {
     }
   }
 
+  private[this] val anyObjectType = ObjectType(classOf[AnyRef])
   private def getField[U](input: InternalRow, fieldIndex: Int): U = {
-    input.get(fieldIndex, null).asInstanceOf[U]
+    input.get(fieldIndex, anyObjectType).asInstanceOf[U]
   }
 
   final override lazy val aggBufferAttributes: Seq[AttributeReference] = {

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala
@@ -54,16 +54,9 @@ object AggUtils {
       initialInputBufferOffset: Int = 0,
       resultExpressions: Seq[NamedExpression] = Nil,
       child: SparkPlan): SparkPlan = {
-
-    val hasTypedImperativeAggregate: Boolean = aggregateExpressions.exists {
-      case AggregateExpression(agg: TypedImperativeAggregate[_], _, _, _) => true
-      case _ => false
-    }
-
-    val aggBufferAttributesSupportedByHashAggregate = HashAggregateExec.supportsAggregate(
+    val useHash = HashAggregateExec.supportsAggregate(
       aggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes))
-
-    if (aggBufferAttributesSupportedByHashAggregate && !hasTypedImperativeAggregate) {
+    if (useHash) {
       HashAggregateExec(
         requiredChildDistributionExpressions = requiredChildDistributionExpressions,
         groupingExpressions = groupingExpressions,

diff --git a/...rc/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala b/...rc/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala
@@ -55,14 +55,7 @@ class SortBasedAggregationIterator(
 
     val genericMutableBuffer = new GenericMutableRow(bufferRowSize)
 
-    val allFieldsMutable = bufferSchema.map(_.dataType).forall(UnsafeRow.isMutable)
-
-    val hasTypedImperativeAggregate = aggregateFunctions.exists {
-      case agg: TypedImperativeAggregate[_] => true
-      case _ => false
-    }
-
-    val useUnsafeBuffer = allFieldsMutable && !hasTypedImperativeAggregate
+    val useUnsafeBuffer = bufferSchema.map(_.dataType).forall(UnsafeRow.isMutable)
 
     val buffer = if (useUnsafeBuffer) {
       val unsafeProjection =