apache · GraceH · Oct 28, 2015 · Nov 10, 2015 · Nov 10, 2015 · Nov 10, 2015
diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
@@ -52,11 +52,19 @@ private[spark] trait ExecutorAllocationClient {
    * Request that the cluster manager kill the specified executors.
    * @return whether the request is acknowledged by the cluster manager.
    */
-  def killExecutors(executorIds: Seq[String]): Boolean
+  def killExecutors(executorIds: Seq[String], force: Boolean): Boolean
 
   /**
    * Request that the cluster manager kill the specified executor.
    * @return whether the request is acknowledged by the cluster manager.
    */
-  def killExecutor(executorId: String): Boolean = killExecutors(Seq(executorId))
+  private[spark] def killExecutor(executorId: String): Boolean =
+    killExecutors(Seq(executorId), force = false)
+
+  /**
+   * Request that the cluster manager kill the specified executor.
+   * @return whether the request is acknowledged by the cluster manager.
+   */
+  def killExecutor(executorId: String, force: Boolean): Boolean =
+    killExecutors(Seq(executorId), force)
 }
diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -403,6 +403,8 @@ private[spark] class ExecutorAllocationManager(
     // Send a request to the backend to kill this executor
     val removeRequestAcknowledged = testing || client.killExecutor(executorId)
     if (removeRequestAcknowledged) {
+      // even we get removeRequestAcknowledged, the executor may not be killed
+      // it can be rescued while onTaskStart event happens
       logInfo(s"Removing executor $executorId because it has been idle for " +
         s"$executorIdleTimeoutS seconds (new desired total will be ${numExistingExecutors - 1})")
       executorsPendingToRemove.add(executorId)
@@ -509,6 +511,13 @@ private[spark] class ExecutorAllocationManager(
   private def onExecutorBusy(executorId: String): Unit = synchronized {
     logDebug(s"Clearing idle timer for $executorId because it is now running a task")
     removeTimes.remove(executorId)
+
+    // Executor is added to remove by misjudgment due to async listener making it as idle).
+    // see SPARK-9552
+    if (executorsPendingToRemove.contains(executorId)) {
 if (removeRequestAcknowledged) { 
 if (removeRequestAcknowledged) { 
+      // Rescue the executor from pending to remove list
+      executorsPendingToRemove.remove(executorId)
+    }
   }
 
   /**

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1448,10 +1448,10 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
    * @return whether the request is received.
    */
   @DeveloperApi
-  override def killExecutors(executorIds: Seq[String]): Boolean = {
+  override def killExecutors(executorIds: Seq[String], force: Boolean = true): Boolean = {
     schedulerBackend match {
       case b: CoarseGrainedSchedulerBackend =>
-        b.killExecutors(executorIds)
+        b.killExecutors(executorIds, force)
       case _ =>
         logWarning("Killing executors is only supported in coarse-grained mode")
         false
@@ -1470,7 +1470,8 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
    * @return whether the request is received.
    */
   @DeveloperApi
-  override def killExecutor(executorId: String): Boolean = super.killExecutor(executorId)
+  override def killExecutor(executorId: String, force: Boolean = true): Boolean =
+    super.killExecutor(executorId, force)
 
   /**
    * Request that the cluster manager kill the specified executor without adjusting the
@@ -1486,10 +1487,10 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
    *
    * @return whether the request is received.
    */
-  private[spark] def killAndReplaceExecutor(executorId: String): Boolean = {
+  private[spark] def killAndReplaceExecutor(executorId: String, force: Boolean = true): Boolean = {
     schedulerBackend match {
       case b: CoarseGrainedSchedulerBackend =>
-        b.killExecutors(Seq(executorId), replace = true)
+        b.killExecutors(Seq(executorId), replace = true, force)
       case _ =>
         logWarning("Killing executors is only supported in coarse-grained mode")
         false

diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -88,7 +88,8 @@ private[spark] class TaskSchedulerImpl(
   val nextTaskId = new AtomicLong(0)
 
   // Which executor IDs we have executors on
-  val activeExecutorIds = new HashSet[String]
+  // each executor will record running or launched task number
+  val activeExecutorIdsWithLoads = new HashMap[String, Int]
 
   // The set of executors we have on each host; this is used to compute hostsAlive, which
   // in turn is used to decide when we can attain data locality on a given host
@@ -254,6 +255,7 @@ private[spark] class TaskSchedulerImpl(
             val tid = task.taskId
             taskIdToTaskSetManager(tid) = taskSet
             taskIdToExecutorId(tid) = execId
+            activeExecutorIdsWithLoads(execId) += 1
             executorsByHost(host) += execId
             availableCpus(i) -= CPUS_PER_TASK
             assert(availableCpus(i) >= 0)
@@ -282,7 +284,7 @@ private[spark] class TaskSchedulerImpl(
     var newExecAvail = false
     for (o <- offers) {
       executorIdToHost(o.executorId) = o.host
-      activeExecutorIds += o.executorId
+      activeExecutorIdsWithLoads.getOrElseUpdate(o.executorId, 0)
       if (!executorsByHost.contains(o.host)) {
         executorsByHost(o.host) = new HashSet[String]()
         executorAdded(o.executorId, o.host)
@@ -331,7 +333,8 @@ private[spark] class TaskSchedulerImpl(
         if (state == TaskState.LOST && taskIdToExecutorId.contains(tid)) {
           // We lost this entire executor, so remember that it's gone
           val execId = taskIdToExecutorId(tid)
-          if (activeExecutorIds.contains(execId)) {
+
+          if (activeExecutorIdsWithLoads.contains(execId)) {
             removeExecutor(execId,
               SlaveLost(s"Task $tid was lost, so marking the executor as lost as well."))
             failedExecutor = Some(execId)
@@ -341,7 +344,10 @@ private[spark] class TaskSchedulerImpl(
           case Some(taskSet) =>
             if (TaskState.isFinished(state)) {
               taskIdToTaskSetManager.remove(tid)
-              taskIdToExecutorId.remove(tid)
+              taskIdToExecutorId.remove(tid) match {
+                case Some(execId) => activeExecutorIdsWithLoads(execId) -= 1
+                case None =>
+              }
             }
             if (state == TaskState.FINISHED) {
               taskSet.removeRunningTask(tid)
@@ -462,7 +468,7 @@ private[spark] class TaskSchedulerImpl(
     var failedExecutor: Option[String] = None
 
     synchronized {
-      if (activeExecutorIds.contains(executorId)) {
+      if (activeExecutorIdsWithLoads.contains(executorId)) {
         val hostPort = executorIdToHost(executorId)
         logError("Lost executor %s on %s: %s".format(executorId, hostPort, reason))
         removeExecutor(executorId, reason)
@@ -484,7 +490,8 @@ private[spark] class TaskSchedulerImpl(
 
   /** Remove an executor from all our data structures and mark it as lost */
   private def removeExecutor(executorId: String, reason: ExecutorLossReason) {
-    activeExecutorIds -= executorId
+    activeExecutorIdsWithLoads -= executorId
+
     val host = executorIdToHost(executorId)
     val execs = executorsByHost.getOrElse(host, new HashSet)
     execs -= executorId
@@ -518,7 +525,11 @@ private[spark] class TaskSchedulerImpl(
   }
 
   def isExecutorAlive(execId: String): Boolean = synchronized {
-    activeExecutorIds.contains(execId)
+    activeExecutorIdsWithLoads.contains(execId)
+  }
+
+  def isExecutorBusy(execId: String): Boolean = synchronized {
+    activeExecutorIdsWithLoads.getOrElse(execId, -1) > 0
   }
 
   // By default, rack is unknown

diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -410,8 +410,10 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
    * Request that the cluster manager kill the specified executors.
    * @return whether the kill request is acknowledged.
    */
-  final override def killExecutors(executorIds: Seq[String]): Boolean = synchronized {
-    killExecutors(executorIds, replace = false)
+  final override def killExecutors(
+      executorIds: Seq[String],
+      force: Boolean): Boolean = synchronized {
+    killExecutors(executorIds, replace = false, force)
   }
 
   /**
@@ -421,15 +423,29 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
    * @param replace whether to replace the killed executors with new ones
    * @return whether the kill request is acknowledged.
    */
-  final def killExecutors(executorIds: Seq[String], replace: Boolean): Boolean = synchronized {
+  final def killExecutors(
+      executorIds: Seq[String],
+      replace: Boolean,
+      force: Boolean): Boolean = synchronized {
     logInfo(s"Requesting to kill executor(s) ${executorIds.mkString(", ")}")
     val (knownExecutors, unknownExecutors) = executorIds.partition(executorDataMap.contains)
     unknownExecutors.foreach { id =>
       logWarning(s"Executor to kill $id does not exist!")
     }
 
+    // force killing all busy and idle executors
+    // otherwise, only idle executors are valid to be killed
+    val idleExecutors =
+      if (force) {
+        knownExecutors
+      } else {
+        knownExecutors.filter { id =>
+          logWarning(s"Busy executor $id is not valid to be killed!")
+          !scheduler.isExecutorBusy(id)}
+      }
+
     // If an executor is already pending to be removed, do not kill it again (SPARK-9795)
-    val executorsToKill = knownExecutors.filter { id => !executorsPendingToRemove.contains(id) }
+    val executorsToKill = idleExecutors.filter { id => !executorsPendingToRemove.contains(id) }
     executorsPendingToRemove ++= executorsToKill
 
     // If we do not wish to replace the executors we kill, sync the target number of executors
@@ -442,6 +458,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
       numPendingExecutors += knownExecutors.size
     }
 
+    // executorsToKill may be empty
     doKillExecutors(executorsToKill)
   }