-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-9552] Add force control for killExecutors to avoid false killing for those busy executors #7888
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-9552] Add force control for killExecutors to avoid false killing for those busy executors #7888
Changes from 1 commit
c23f887
dc660f6
27faa6b
8774124
946ed7e
feefbfe
fa3c88e
cb78e56
5bcfd81
01c236a
2108dbf
c0a1d54
4b1959f
0293d82
342a59d
806a64d
4ce0ec0
0000551
c44ef87
d3f51db
0daeb5a
1938e61
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -88,7 +88,8 @@ private[spark] class TaskSchedulerImpl( | |
| val nextTaskId = new AtomicLong(0) | ||
|
|
||
| // Which executor IDs we have executors on | ||
| val activeExecutorIds = new HashSet[String] | ||
| // each executor will record running or launched task number | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: "task count" instead of "task number". |
||
| val activeExecutorIdsWithLoads = new HashMap[String, Int] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: instead of |
||
|
|
||
| // The set of executors we have on each host; this is used to compute hostsAlive, which | ||
| // in turn is used to decide when we can attain data locality on a given host | ||
|
|
@@ -254,6 +255,7 @@ private[spark] class TaskSchedulerImpl( | |
| val tid = task.taskId | ||
| taskIdToTaskSetManager(tid) = taskSet | ||
| taskIdToExecutorId(tid) = execId | ||
| activeExecutorIdsWithLoads(execId) += 1 | ||
| executorsByHost(host) += execId | ||
| availableCpus(i) -= CPUS_PER_TASK | ||
| assert(availableCpus(i) >= 0) | ||
|
|
@@ -282,7 +284,7 @@ private[spark] class TaskSchedulerImpl( | |
| var newExecAvail = false | ||
| for (o <- offers) { | ||
| executorIdToHost(o.executorId) = o.host | ||
| activeExecutorIds += o.executorId | ||
| activeExecutorIdsWithLoads.getOrElseUpdate(o.executorId, 0) | ||
| if (!executorsByHost.contains(o.host)) { | ||
| executorsByHost(o.host) = new HashSet[String]() | ||
| executorAdded(o.executorId, o.host) | ||
|
|
@@ -331,7 +333,8 @@ private[spark] class TaskSchedulerImpl( | |
| if (state == TaskState.LOST && taskIdToExecutorId.contains(tid)) { | ||
| // We lost this entire executor, so remember that it's gone | ||
| val execId = taskIdToExecutorId(tid) | ||
| if (activeExecutorIds.contains(execId)) { | ||
|
|
||
| if (activeExecutorIdsWithLoads.contains(execId)) { | ||
| removeExecutor(execId, | ||
| SlaveLost(s"Task $tid was lost, so marking the executor as lost as well.")) | ||
| failedExecutor = Some(execId) | ||
|
|
@@ -341,7 +344,10 @@ private[spark] class TaskSchedulerImpl( | |
| case Some(taskSet) => | ||
| if (TaskState.isFinished(state)) { | ||
| taskIdToTaskSetManager.remove(tid) | ||
| taskIdToExecutorId.remove(tid) | ||
| taskIdToExecutorId.remove(tid) match { | ||
| case Some(execId) => activeExecutorIdsWithLoads(execId) -= 1 | ||
| case None => | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will change that. |
||
| } | ||
| if (state == TaskState.FINISHED) { | ||
| taskSet.removeRunningTask(tid) | ||
|
|
@@ -462,7 +468,7 @@ private[spark] class TaskSchedulerImpl( | |
| var failedExecutor: Option[String] = None | ||
|
|
||
| synchronized { | ||
| if (activeExecutorIds.contains(executorId)) { | ||
| if (activeExecutorIdsWithLoads.contains(executorId)) { | ||
| val hostPort = executorIdToHost(executorId) | ||
| logError("Lost executor %s on %s: %s".format(executorId, hostPort, reason)) | ||
| removeExecutor(executorId, reason) | ||
|
|
@@ -484,7 +490,8 @@ private[spark] class TaskSchedulerImpl( | |
|
|
||
| /** Remove an executor from all our data structures and mark it as lost */ | ||
| private def removeExecutor(executorId: String, reason: ExecutorLossReason) { | ||
| activeExecutorIds -= executorId | ||
| activeExecutorIdsWithLoads -= executorId | ||
|
|
||
| val host = executorIdToHost(executorId) | ||
| val execs = executorsByHost.getOrElse(host, new HashSet) | ||
| execs -= executorId | ||
|
|
@@ -518,7 +525,11 @@ private[spark] class TaskSchedulerImpl( | |
| } | ||
|
|
||
| def isExecutorAlive(execId: String): Boolean = synchronized { | ||
| activeExecutorIds.contains(execId) | ||
| activeExecutorIdsWithLoads.contains(execId) | ||
| } | ||
|
|
||
| def isExecutorBusy(execId: String): Boolean = synchronized { | ||
| activeExecutorIdsWithLoads.getOrElse(execId, -1) > 0 | ||
| } | ||
|
|
||
| // By default, rack is unknown | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -410,8 +410,10 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp | |
| * Request that the cluster manager kill the specified executors. | ||
| * @return whether the kill request is acknowledged. | ||
| */ | ||
| final override def killExecutors(executorIds: Seq[String]): Boolean = synchronized { | ||
| killExecutors(executorIds, replace = false) | ||
| final override def killExecutors( | ||
| executorIds: Seq[String], | ||
| force: Boolean): Boolean = synchronized { | ||
| killExecutors(executorIds, replace = false, force) | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -421,15 +423,29 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp | |
| * @param replace whether to replace the killed executors with new ones | ||
| * @return whether the kill request is acknowledged. | ||
| */ | ||
| final def killExecutors(executorIds: Seq[String], replace: Boolean): Boolean = synchronized { | ||
| final def killExecutors( | ||
| executorIds: Seq[String], | ||
| replace: Boolean, | ||
| force: Boolean): Boolean = synchronized { | ||
| logInfo(s"Requesting to kill executor(s) ${executorIds.mkString(", ")}") | ||
| val (knownExecutors, unknownExecutors) = executorIds.partition(executorDataMap.contains) | ||
| unknownExecutors.foreach { id => | ||
| logWarning(s"Executor to kill $id does not exist!") | ||
| } | ||
|
|
||
| // force killing all busy and idle executors | ||
| // otherwise, only idle executors are valid to be killed | ||
| val idleExecutors = | ||
| if (force) { | ||
| knownExecutors | ||
| } else { | ||
| knownExecutors.filter { id => | ||
| logWarning(s"Busy executor $id is not valid to be killed!") | ||
| !scheduler.isExecutorBusy(id)} | ||
| } | ||
|
||
|
|
||
| // If an executor is already pending to be removed, do not kill it again (SPARK-9795) | ||
| val executorsToKill = knownExecutors.filter { id => !executorsPendingToRemove.contains(id) } | ||
| val executorsToKill = idleExecutors.filter { id => !executorsPendingToRemove.contains(id) } | ||
| executorsPendingToRemove ++= executorsToKill | ||
|
|
||
| // If we do not wish to replace the executors we kill, sync the target number of executors | ||
|
|
@@ -442,6 +458,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp | |
| numPendingExecutors += knownExecutors.size | ||
| } | ||
|
|
||
| // executorsToKill may be empty | ||
|
||
| doKillExecutors(executorsToKill) | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Instead of fixing the list of pending executors here, I wonder if it wouldn't be better to change
removeExecutorto returnfalsewhen the executors asked to be killed were busy? Then they wouldn't even be added to this list to start with.That changes slightly the semantics of the return value, but it also sounds more correct. With your changes,
killExecutors(..., force = false)will returntrueeven if it didn't kill any executors, which sounds wrong.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@vanzin, In the original design, I changed the return back value for that function (
killExecutors). Not only for it is the last round of review comments. But also since It is still a little bit strange. For example, you have 3 executors to kill with force=false. And you find one of them is busy. It is hard to tell killing success or not directly. But if we only support single executor here, it is much simple and straightforward.Besides, this is changed according to last round of review comments. Since the killExecutors only returns with the acknowledge (in documentation), which doesn't indicate the status of kill action. Please let me know your further thoughts.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
While I agree with what you say, the current return value is both not very useful and really not in line with what the documentation says. It basically means "a message was sent to the cluster manager asking the executors to be killed". It doesn't mean the cluster manager received the message nor whether it successfully acted on it.
So IMO it should be fine to change the meaning of the return value of
killExecutorslightly; it would make the return value slightly more useful.Also, that makes me question whether your current code really works. If the executor ID is in the
executorsPendingToRemovelist, it means a request to kill that executor has already been sent to the cluster manager. Meaning that even if you remove the executor from this list, the cluster manager will still kill it. Which makes my suggestion of not sending the kill request even more important.I see what the race is, but once the request is sent to the cluster manager, it's too late to try to fix things. So the only enhancement I see is if you're able to avoid sending the request in the first place.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@vanzin Here is the code path.
executorsPendingToRemove. There is no way to tell who is actually to kill.That is why we need such kind of rescuing. please let me know if it makes sense.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But what are you rescuing? You're removing the executor from the "pending to remove" list; but the request to kill the executor has already been sent, otherwise it would not be there. So the executor will still be killed, even if you remove it from this list.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@vanzin I got little bit confused. If at least one executor was killed, and return true. Then all those executors will be added to
executorsPendingToRemove. seespark/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
Line 405 in f85aa06
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Look at that code again. That code is calling
killExecutor, NOTkillExecutors. There is a single executor involved!There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK. I see. You mean change the killExecutor return value only, right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I mean changing the return value of
killExecutor. But sincekillExecutoris implemented as a call tokillExecutors, plural, with a list containing a single executor, you have to change the return value ofkillExecutors.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK. I will change that accordingly. It will change the original semantics also.