apache · ooq · Jul 27, 2016 · Jul 27, 2016 · Jul 28, 2016 · Jul 28, 2016
diff --git a/...main/java/org/apache/spark/sql/catalyst/expressions/FixedLengthRowBasedKeyValueBatch.java b/...main/java/org/apache/spark/sql/catalyst/expressions/FixedLengthRowBasedKeyValueBatch.java
@@ -165,10 +165,10 @@ private void freeCurrentPage() {
   protected FixedLengthRowBasedKeyValueBatch(StructType keySchema, StructType valueSchema,
                                              int maxRows, TaskMemoryManager manager) {
     super(keySchema, valueSchema, maxRows, manager);
-    klen = keySchema.defaultSize()
-            + UnsafeRow.calculateBitSetWidthInBytes(keySchema.length());
-    vlen = valueSchema.defaultSize()
-            + UnsafeRow.calculateBitSetWidthInBytes(valueSchema.length());
+    int keySize = keySchema.size() * 8; // each fixed-length field is stored in a 8-byte word
+    int valueSize = valueSchema.size() * 8;
+    klen = keySize + UnsafeRow.calculateBitSetWidthInBytes(keySchema.length());
+    vlen = valueSize + UnsafeRow.calculateBitSetWidthInBytes(valueSchema.length());
     recordLength = klen + vlen + 8;
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
diff --git a/...re/src/main/scala/org/apache/spark/sql/execution/aggregate/RowBasedHashMapGenerator.scala b/...re/src/main/scala/org/apache/spark/sql/execution/aggregate/RowBasedHashMapGenerator.scala
@@ -141,8 +141,16 @@ class RowBasedHashMapGenerator(
     }
 
     val createUnsafeRowForKey = groupingKeys.zipWithIndex.map { case (key: Buffer, ordinal: Int) =>
-      s"agg_rowWriter.write(${ordinal}, ${key.name})"}
-      .mkString(";\n")
+      key.dataType match {
+        case t: DecimalType =>
+          s"agg_rowWriter.write(${ordinal}, ${key.name}, ${t.precision}, ${t.scale})"
+        case t: DataType =>
+          if (!t.isInstanceOf[StringType] && !ctx.isPrimitiveType(t)) {
+            throw new IllegalArgumentException(s"cannot generate code for unsupported type: $t")
+          }
+          s"agg_rowWriter.write(${ordinal}, ${key.name})"
+      }
+    }.mkString(";\n")
 
     s"""
        |public org.apache.spark.sql.catalyst.expressions.UnsafeRow findOrInsert(${

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -499,14 +499,16 @@ object SQLConf {
       .intConf
       .createWithDefault(40)
 
-  val VECTORIZED_AGG_MAP_MAX_COLUMNS =
-    SQLConfigBuilder("spark.sql.codegen.aggregate.map.columns.max")
+  val FAST_AGG_MAP_IMPL =
+    SQLConfigBuilder("spark.sql.codegen.aggregate.map.enforce.impl")
       .internal()
-      .doc("Sets the maximum width of schema (aggregate keys + values) for which aggregate with" +
-        "keys uses an in-memory columnar map to speed up execution. Setting this to 0 effectively" +
-        "disables the columnar map")
-      .intConf
-      .createWithDefault(3)
+      .doc("Sets the implementation for fast hash map during aggregation. Could be one of the " +
+        "following: rowbased, vectorized, skip, auto. Defaults to auto, and should only be other " +
+        "values for testing purposes.")
+      .stringConf
+      .transform(_.toLowerCase())
+      .checkValues(Set("rowbased", "vectorized", "skip", "auto"))
+      .createWithDefault("auto")
 
   val FILE_SINK_LOG_DELETION = SQLConfigBuilder("spark.sql.streaming.fileSink.log.deletion")
     .internal()
@@ -673,7 +675,7 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging {
 
   override def runSQLonFile: Boolean = getConf(RUN_SQL_ON_FILES)
 
-  def vectorizedAggregateMapMaxColumns: Int = getConf(VECTORIZED_AGG_MAP_MAX_COLUMNS)
+  def enforceFastAggHashMapImpl: String = getConf(FAST_AGG_MAP_IMPL)
 
   def variableSubstituteEnabled: Boolean = getConf(VARIABLE_SUBSTITUTE_ENABLED)
 

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/AggregateHashMapSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/AggregateHashMapSuite.scala
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.types.DecimalType
+
+abstract class AggregateHashMapSuite extends DataFrameAggregateSuite {
+  import testImplicits._
+
+  protected def setAggregateHashMapImpl(): Unit
+
+  protected override def beforeAll(): Unit = {
+      setAggregateHashMapImpl()
+      sparkConf.set("spark.sql.codegen.fallback", "false")
+      super.beforeAll()
+  }
+
+  test("SQL decimal test") {
+    checkAnswer(
+      decimalData.groupBy('a cast DecimalType(10, 2)).agg(avg('b cast DecimalType(10, 2))),
+      Seq(Row(new java.math.BigDecimal(1.0), new java.math.BigDecimal(1.5)),
+        Row(new java.math.BigDecimal(2.0), new java.math.BigDecimal(1.5)),
+        Row(new java.math.BigDecimal(3.0), new java.math.BigDecimal(1.5))))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/RowBasedAggregateHashMapSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/RowBasedAggregateHashMapSuite.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+class RowBasedAggregateHashMapSuite extends AggregateHashMapSuite {
+
+  protected def setAggregateHashMapImpl() {
+    sparkConf.set("spark.sql.codegen.aggregate.map.enforce.impl", "rowbased")
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/VectorizedAggregateHashMapSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/VectorizedAggregateHashMapSuite.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+class VectorizedAggregateHashMapSuite extends AggregateHashMapSuite {
+
+  protected def setAggregateHashMapImpl() {
+    sparkConf.set("spark.sql.codegen.aggregate.map.enforce.impl", "vectorized")
+  }
+}