stylistic fixes

apache · CodingCat · May 4, 2014 · Jan 27, 2015 · Jan 27, 2015 · Jan 27, 2015
commit 2eeff77c066bc14bd13dbf14f75c1b62fe55db07
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
@@ -75,8 +75,10 @@ private[deploy] class ApplicationInfo(
     }
   }
 
-  private[master] def addExecutor(worker: WorkerInfo, cores: Int, useID: Option[Int] = None):
-      ExecutorDesc = {
+  private[master] def addExecutor(
+      worker: WorkerInfo,
+      cores: Int,
+      useID: Option[Int] = None): ExecutorDesc = {
     val exec = new ExecutorDesc(newExecutorId(useID), this, worker, cores, desc.memoryPerExecutorMB)
     executors(exec.id) = exec
     coresGranted += cores

diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -533,11 +533,14 @@ private[master] class Master(
   }
 
   /**
-   * The resource allocator spread out each app among all the workers until it has all its cores in
-   * spreadOut mode otherwise packs each app into as few workers as possible until it has assigned
-   * all its cores. User can define spark.deploy.maxCoresPerExecutor per application to
-   * limit the maximum number of cores to allocate to each executor on each worker; if the parameter
-   * is not defined, then only one executor will be launched on a worker.
+   * Schedule executors to be launched on the workers.There are two modes of launching executors.
+   * The first attempts to spread out an application's executors on as many workers as possible,
+   * while the second does the opposite (i.e. launch them on as few workers as possible). The former
+   * is usually better for data locality purposes and is the default. The number of cores assigned
+   * to each executor is configurable. When this is explicitly set, multiple executors from the same
+   * application may be launched on the same worker if the worker has enough cores and memory.
+   * Otherwise, each executor grabs all the cores available on the worker by default, in which case
+   * only one executor may be launched on each worker.
    */
   private def startExecutorsOnWorkers(): Unit = {
     // Right now this is a very simple FIFO scheduler. We keep trying to fit in the first app
@@ -546,7 +549,9 @@ private[master] class Master(
       // Try to spread out each app among all the workers, until it has all its cores
       for (app <- waitingApps if app.coresLeft > 0) {
         val usableWorkers = workers.toArray.filter(_.state == WorkerState.ALIVE)
-          .filter(canUse(app, _)).sortBy(_.coresFree).reverse
+          .filter(worker => worker.memoryFree >= app.desc.memoryPerExecutorMB &&
+            worker.coresFree > 0)
+          .sortBy(_.coresFree).reverse
         val numUsable = usableWorkers.length
         val assigned = new Array[Int](numUsable) // Number of cores to give on each node
         var toAssign = math.min(app.coresLeft, usableWorkers.map(_.coresFree).sum)
@@ -566,15 +571,16 @@ private[master] class Master(
     } else {
       // Pack each app into as few workers as possible until we've assigned all its cores
       for (worker <- workers if worker.coresFree > 0 && worker.state == WorkerState.ALIVE) {
-        for (app <- waitingApps if app.coresLeft > 0) {
-          allocateWorkerResourceToExecutors(app, app.coresLeft, worker)
+        for (app <- waitingApps if app.coresLeft > 0 &&
+          worker.memoryFree >= app.desc.memoryPerExecutorMB) {
+            allocateWorkerResourceToExecutors(app, app.coresLeft, worker)
         }
       }
     }
   }
 
   /**
-   * allocate resources in a certain worker to one or more executors
+   * Allocate a worker's resources to one or more executors.
    * @param app the info of the application which the executors belong to
    * @param coresToAllocate cores on this worker to be allocated to this application
    * @param worker the worker info
@@ -583,20 +589,24 @@ private[master] class Master(
       app: ApplicationInfo,
       coresToAllocate: Int,
       worker: WorkerInfo): Unit = {
-    if (canUse(app, worker)) {
-      val memoryPerExecutor = app.desc.memoryPerExecutorMB
-      val coresPerExecutor = app.desc.coresPerExecutor.getOrElse(coresToAllocate)
-      var coresLeft = coresToAllocate
-      while (coresLeft >= coresPerExecutor && worker.memoryFree >= memoryPerExecutor) {
-        val exec = app.addExecutor(worker, coresPerExecutor)
-        coresLeft -= coresPerExecutor
-        launchExecutor(worker, exec)
-        app.state = ApplicationState.RUNNING
-      }
+    val memoryPerExecutor = app.desc.memoryPerExecutorMB
+    val coresPerExecutor = app.desc.coresPerExecutor.getOrElse(coresToAllocate)
+    var coresLeft = coresToAllocate
+    while (coresLeft >= coresPerExecutor && worker.memoryFree >= memoryPerExecutor) {
+      val exec = app.addExecutor(worker, coresPerExecutor)
+      coresLeft -= coresPerExecutor
+      launchExecutor(worker, exec)
+      app.state = ApplicationState.RUNNING
     }
   }
 
-  private def startDriversOnWorkers(): Unit = {
+  /**
+   * Schedule the currently available resources among waiting apps. This method will be called
+   * every time a new app joins or resource availability changes.
+   */
+  private def schedule(): Unit = {
+    if (state != RecoveryState.ALIVE) { return }
+    // start in-cluster drivers, they take strict precedence over applications
     val shuffledWorkers = Random.shuffle(workers) // Randomization helps balance drivers
     for (worker <- shuffledWorkers if worker.state == WorkerState.ALIVE) {
       for (driver <- waitingDrivers) {
@@ -606,21 +616,11 @@ private[master] class Master(
         }
       }
     }
-  }
-
-  /**
-   * Schedule the currently available resources among waiting apps. This method will be called
-   * every time a new app joins or resource availability changes.
-   */
-  private def schedule(): Unit = {
-    if (state != RecoveryState.ALIVE) { return }
-    // start in-cluster drivers, they take strict precedence over applications
-    startDriversOnWorkers()
     // start executors
     startExecutorsOnWorkers()
   }
 
-  def launchExecutor(worker: WorkerInfo, exec: ExecutorDesc): Unit =  {
+  def launchExecutor(worker: WorkerInfo, exec: ExecutorDesc): Unit = {
     logInfo("Launching executor " + exec.fullId + " on worker " + worker.id)
     worker.addExecutor(exec)
     worker.actor ! LaunchExecutor(masterUrl,

diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
@@ -82,9 +82,9 @@ private[spark] class SparkDeploySchedulerBackend(
     val command = Command("org.apache.spark.executor.CoarseGrainedExecutorBackend",
       args, sc.executorEnvs, classPathEntries ++ testingClassPath, libraryPathEntries, javaOpts)
     val appUIAddress = sc.ui.map(_.appUIAddress).getOrElse("")
+    val coresPerExecutor = conf.getOption("spark.executor.cores").map(_.toInt)
     val appDesc = new ApplicationDescription(sc.appName, maxCores, sc.executorMemory,
-      command, appUIAddress, sc.eventLogDir, sc.eventLogCodec,
-      conf.getOption("spark.executor.cores").map(_.toInt))
+      command, appUIAddress, sc.eventLogDir, sc.eventLogCodec, coresPerExecutor)
     client = new AppClient(sc.env.actorSystem, masters, appDesc, this, conf)
     client.start()
     waitForRegistration()

diff --git a/docs/configuration.md b/docs/configuration.md
@@ -715,10 +715,8 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.executor.cores</code></td>
-  <td>(infinite)</td>
+  <td>1 in YARN mode, all the available cores on the worker in standalone mode.</td>
   <td>
-    Default: 1 in YARN mode, all the available cores on the worker in standalone mode.
-
     The number of cores to use on each executor. For YARN and standalone mode only.
 
     In standalone mode, setting this parameter allows an application to run multiple executors on