Introduce MemoryManager interface; add to SparkEnv.

The configuration of HEAP vs UNSAFE is now done at the Spark core level. The translation of encoded 64-bit addresses into base object + offset pairs is now handled by MemoryManager, allowing this pointers to be safely passed between operators that exchange data pages.
apache · JoshRosen · Apr 17, 2015 · Apr 18, 2015 · Apr 18, 2015 · Apr 18, 2015
commit a8e4a3fe40574c3a609beeb4794b11bd720a31e7
diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -40,6 +40,7 @@ import org.apache.spark.scheduler.OutputCommitCoordinator.OutputCommitCoordinato
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.shuffle.{ShuffleMemoryManager, ShuffleManager}
 import org.apache.spark.storage._
+import org.apache.spark.unsafe.memory.{MemoryManager => UnsafeMemoryManager, MemoryAllocator}
 import org.apache.spark.util.{RpcUtils, Utils}
 
 /**
@@ -69,6 +70,7 @@ class SparkEnv (
     val sparkFilesDir: String,
     val metricsSystem: MetricsSystem,
     val shuffleMemoryManager: ShuffleMemoryManager,
+    val unsafeMemoryManager: UnsafeMemoryManager,
     val outputCommitCoordinator: OutputCommitCoordinator,
     val conf: SparkConf) extends Logging {
 
@@ -382,6 +384,15 @@ object SparkEnv extends Logging {
       new OutputCommitCoordinatorEndpoint(rpcEnv, outputCommitCoordinator))
     outputCommitCoordinator.coordinatorRef = Some(outputCommitCoordinatorRef)
 
+    val unsafeMemoryManager: UnsafeMemoryManager = {
+      val allocator = if (conf.getBoolean("spark.unsafe.offHeap", false)) {
+        MemoryAllocator.UNSAFE
+      } else {
+        MemoryAllocator.HEAP
+      }
+      new UnsafeMemoryManager(allocator)
+    }
+
     val envInstance = new SparkEnv(
       executorId,
       rpcEnv,
@@ -398,6 +409,7 @@ object SparkEnv extends Logging {
       sparkFilesDir,
       metricsSystem,
       shuffleMemoryManager,
+      unsafeMemoryManager,
       outputCommitCoordinator,
       conf)
 

diff --git a/...c/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeFixedWidthAggregationMap.java b/...c/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeFixedWidthAggregationMap.java
@@ -25,8 +25,8 @@
 import org.apache.spark.sql.types.StructType;
 import org.apache.spark.unsafe.PlatformDependent;
 import org.apache.spark.unsafe.map.BytesToBytesMap;
-import org.apache.spark.unsafe.memory.MemoryAllocator;
 import org.apache.spark.unsafe.memory.MemoryLocation;
+import org.apache.spark.unsafe.memory.MemoryManager;
 
 /**
  * Unsafe-based HashMap for performing aggregations where the aggregated values are fixed-width.
@@ -102,23 +102,23 @@ public static boolean supportsAggregationBufferSchema(StructType schema) {
    * @param emptyAggregationBuffer the default value for new keys (a "zero" of the agg. function)
    * @param aggregationBufferSchema the schema of the aggregation buffer, used for row conversion.
    * @param groupingKeySchema the schema of the grouping key, used for row conversion.
-   * @param allocator the memory allocator used to allocate our Unsafe memory structures.
+   * @param groupingKeySchema the memory manager used to allocate our Unsafe memory structures.
    * @param initialCapacity the initial capacity of the map (a sizing hint to avoid re-hashing).
    * @param enablePerfMetrics if true, performance metrics will be recorded (has minor perf impact)
    */
   public UnsafeFixedWidthAggregationMap(
       Row emptyAggregationBuffer,
       StructType aggregationBufferSchema,
       StructType groupingKeySchema,
-      MemoryAllocator allocator,
+      MemoryManager memoryManager,
       int initialCapacity,
       boolean enablePerfMetrics) {
     this.emptyAggregationBuffer =
       convertToUnsafeRow(emptyAggregationBuffer, aggregationBufferSchema);
     this.aggregationBufferSchema = aggregationBufferSchema;
     this.groupingKeyToUnsafeRowConverter = new UnsafeRowConverter(groupingKeySchema);
     this.groupingKeySchema = groupingKeySchema;
-    this.map = new BytesToBytesMap(allocator, initialCapacity, enablePerfMetrics);
+    this.map = new BytesToBytesMap(memoryManager, initialCapacity, enablePerfMetrics);
     this.enablePerfMetrics = enablePerfMetrics;
   }
 

diff --git a/...scala/org/apache/spark/sql/catalyst/expressions/UnsafeFixedWidthAggregationMapSuite.scala b/...scala/org/apache/spark/sql/catalyst/expressions/UnsafeFixedWidthAggregationMapSuite.scala
@@ -17,19 +17,32 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import org.apache.spark.unsafe.memory.MemoryAllocator
-import org.scalatest.{FunSuite, Matchers}
+import org.apache.spark.unsafe.memory.{MemoryManager, MemoryAllocator}
+import org.scalatest.{BeforeAndAfterEach, FunSuite, Matchers}
 
 import org.apache.spark.sql.types._
 
-class UnsafeFixedWidthAggregationMapSuite extends FunSuite with Matchers {
+class UnsafeFixedWidthAggregationMapSuite extends FunSuite with Matchers with BeforeAndAfterEach {
 
   import UnsafeFixedWidthAggregationMap._
 
   private val groupKeySchema = StructType(StructField("product", StringType) :: Nil)
   private val aggBufferSchema = StructType(StructField("salePrice", IntegerType) :: Nil)
   private def emptyAggregationBuffer: Row = new GenericRow(Array[Any](0))
 
+  private var memoryManager: MemoryManager = null
+
+  override def beforeEach(): Unit = {
+    memoryManager = new MemoryManager(true)
+  }
+
+  override def afterEach(): Unit = {
+    if (memoryManager != null) {
+      memoryManager.cleanUpAllPages()
+      memoryManager = null
+    }
+  }
+
   test("supported schemas") {
     assert(!supportsAggregationBufferSchema(StructType(StructField("x", StringType) :: Nil)))
     assert(supportsGroupKeySchema(StructType(StructField("x", StringType) :: Nil)))
@@ -45,7 +58,7 @@ class UnsafeFixedWidthAggregationMapSuite extends FunSuite with Matchers {
       emptyAggregationBuffer,
       aggBufferSchema,
       groupKeySchema,
-      MemoryAllocator.HEAP,
+      memoryManager,
       1024,
       false
     )
@@ -58,7 +71,7 @@ class UnsafeFixedWidthAggregationMapSuite extends FunSuite with Matchers {
       emptyAggregationBuffer,
       aggBufferSchema,
       groupKeySchema,
-      MemoryAllocator.HEAP,
+      memoryManager,
       1024,
       false
     )

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
@@ -31,7 +31,6 @@ private[spark] object SQLConf {
   val SHUFFLE_PARTITIONS = "spark.sql.shuffle.partitions"
   val CODEGEN_ENABLED = "spark.sql.codegen"
   val UNSAFE_ENABLED = "spark.sql.unsafe.enabled"
-  val UNSAFE_USE_OFF_HEAP = "spark.sql.unsafe.offHeap"
   val DIALECT = "spark.sql.dialect"
 
   val PARQUET_BINARY_AS_STRING = "spark.sql.parquet.binaryAsString"
@@ -159,13 +158,6 @@ private[sql] class SQLConf extends Serializable {
    */
   private[spark] def unsafeEnabled: Boolean = getConf(UNSAFE_ENABLED, "false").toBoolean
 
-  /**
-   * When set to true, Spark SQL will use off-heap memory allocation for managed memory operations.
-   *
-   * Defaults to false.
-   */
-  private[spark] def unsafeUseOffHeap: Boolean = getConf(UNSAFE_USE_OFF_HEAP, "false").toBoolean
-
   private[spark] def useSqlSerializer2: Boolean = getConf(USE_SQL_SERIALIZER2, "true").toBoolean
 
   /**

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -1013,8 +1013,6 @@ class SQLContext(@transient val sparkContext: SparkContext)
 
     def unsafeEnabled: Boolean = self.conf.unsafeEnabled
 
-    def unsafeUseOffHeap: Boolean = self.conf.unsafeUseOffHeap
-
     def numPartitions: Int = self.conf.numShufflePartitions
 
     def strategies: Seq[Strategy] =

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/GeneratedAggregate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/GeneratedAggregate.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.execution
 
+import org.apache.spark.SparkEnv
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.trees._
@@ -43,16 +44,14 @@ case class AggregateEvaluation(
  * @param aggregateExpressions expressions that are computed for each group.
  * @param child the input data source.
  * @param unsafeEnabled whether to allow Unsafe-based aggregation buffers to be used.
- * @param useOffHeap whether to use off-heap allocation (only takes effect if unsafeEnabled=true)
  */
 @DeveloperApi
 case class GeneratedAggregate(
     partial: Boolean,
     groupingExpressions: Seq[Expression],
     aggregateExpressions: Seq[NamedExpression],
     child: SparkPlan,
-    unsafeEnabled: Boolean,
-    useOffHeap: Boolean)
+    unsafeEnabled: Boolean)
   extends UnaryNode {
 
   override def requiredChildDistribution: Seq[Distribution] =
@@ -291,7 +290,7 @@ case class GeneratedAggregate(
           newAggregationBuffer(EmptyRow),
           aggregationBufferSchema,
           groupKeySchema,
-          if (useOffHeap) MemoryAllocator.UNSAFE else MemoryAllocator.HEAP,
+          SparkEnv.get.unsafeMemoryManager,
           1024 * 16,
           false
         )

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -141,10 +141,8 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
               groupingExpressions,
               partialComputation,
               planLater(child),
-              unsafeEnabled,
-              unsafeUseOffHeap),
-          unsafeEnabled,
-          unsafeUseOffHeap) :: Nil
+              unsafeEnabled),
+          unsafeEnabled) :: Nil
 
       // Cases where some aggregate can not be codegened
       case PartialAggregation(