Skip to content

Commit 58fbdf6

Browse files
CodingCatsrowen
authored andcommitted
[SPARK-13803] restore the changes in SPARK-3411
## What changes were proposed in this pull request? This patch contains the functionality to balance the load of the cluster-mode drivers among workers This patch restores the changes in #1106 which was erased due to the merging of #731 ## How was this patch tested? test with existing test cases Author: CodingCat <[email protected]> Closes #11702 from CodingCat/SPARK-13803. (cherry picked from commit bd5365b) Signed-off-by: Sean Owen <[email protected]>
1 parent 7a24d94 commit 58fbdf6

File tree

1 file changed

+17
-4
lines changed
  • core/src/main/scala/org/apache/spark/deploy/master

1 file changed

+17
-4
lines changed

core/src/main/scala/org/apache/spark/deploy/master/Master.scala

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -683,15 +683,28 @@ private[deploy] class Master(
683683
* every time a new app joins or resource availability changes.
684684
*/
685685
private def schedule(): Unit = {
686-
if (state != RecoveryState.ALIVE) { return }
686+
if (state != RecoveryState.ALIVE) {
687+
return
688+
}
687689
// Drivers take strict precedence over executors
688-
val shuffledWorkers = Random.shuffle(workers) // Randomization helps balance drivers
689-
for (worker <- shuffledWorkers if worker.state == WorkerState.ALIVE) {
690-
for (driver <- waitingDrivers) {
690+
val shuffledAliveWorkers = Random.shuffle(workers.toSeq.filter(_.state == WorkerState.ALIVE))
691+
val numWorkersAlive = shuffledAliveWorkers.size
692+
var curPos = 0
693+
for (driver <- waitingDrivers.toList) { // iterate over a copy of waitingDrivers
694+
// We assign workers to each waiting driver in a round-robin fashion. For each driver, we
695+
// start from the last worker that was assigned a driver, and continue onwards until we have
696+
// explored all alive workers.
697+
var launched = false
698+
var numWorkersVisited = 0
699+
while (numWorkersVisited < numWorkersAlive && !launched) {
700+
val worker = shuffledAliveWorkers(curPos)
701+
numWorkersVisited += 1
691702
if (worker.memoryFree >= driver.desc.mem && worker.coresFree >= driver.desc.cores) {
692703
launchDriver(worker, driver)
693704
waitingDrivers -= driver
705+
launched = true
694706
}
707+
curPos = (curPos + 1) % numWorkersAlive
695708
}
696709
}
697710
startExecutorsOnWorkers()

0 commit comments

Comments
 (0)