Skip to content

Commit d0b5633

Browse files
JoshRosendavies
authored andcommitted
[SPARK-11307] Reduce memory consumption of OutputCommitCoordinator
OutputCommitCoordinator uses a map in a place where an array would suffice, increasing its memory consumption for result stages with millions of tasks. This patch replaces that map with an array. The only tricky part of this is reasoning about the range of possible array indexes in order to make sure that we never index out of bounds. Author: Josh Rosen <joshrosen@databricks.com> Closes #9274 from JoshRosen/SPARK-11307.
1 parent a752dda commit d0b5633

File tree

3 files changed

+34
-16
lines changed

3 files changed

+34
-16
lines changed

core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -947,7 +947,13 @@ class DAGScheduler(
947947
// serializable. If tasks are not serializable, a SparkListenerStageCompleted event
948948
// will be posted, which should always come after a corresponding SparkListenerStageSubmitted
949949
// event.
950-
outputCommitCoordinator.stageStart(stage.id)
950+
stage match {
951+
case s: ShuffleMapStage =>
952+
outputCommitCoordinator.stageStart(stage = s.id, maxPartitionId = s.numPartitions - 1)
953+
case s: ResultStage =>
954+
outputCommitCoordinator.stageStart(
955+
stage = s.id, maxPartitionId = s.rdd.partitions.length - 1)
956+
}
951957
val taskIdToLocations: Map[Int, Seq[TaskLocation]] = try {
952958
stage match {
953959
case s: ShuffleMapStage =>

core/src/main/scala/org/apache/spark/scheduler/OutputCommitCoordinator.scala

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ private[spark] class OutputCommitCoordinator(conf: SparkConf, isDriver: Boolean)
4747
private type PartitionId = Int
4848
private type TaskAttemptNumber = Int
4949

50+
private val NO_AUTHORIZED_COMMITTER: TaskAttemptNumber = -1
51+
5052
/**
5153
* Map from active stages's id => partition id => task attempt with exclusive lock on committing
5254
* output for that partition.
@@ -56,9 +58,7 @@ private[spark] class OutputCommitCoordinator(conf: SparkConf, isDriver: Boolean)
5658
*
5759
* Access to this map should be guarded by synchronizing on the OutputCommitCoordinator instance.
5860
*/
59-
private val authorizedCommittersByStage: CommittersByStageMap = mutable.Map()
60-
private type CommittersByStageMap =
61-
mutable.Map[StageId, mutable.Map[PartitionId, TaskAttemptNumber]]
61+
private val authorizedCommittersByStage = mutable.Map[StageId, Array[TaskAttemptNumber]]()
6262

6363
/**
6464
* Returns whether the OutputCommitCoordinator's internal data structures are all empty.
@@ -95,9 +95,21 @@ private[spark] class OutputCommitCoordinator(conf: SparkConf, isDriver: Boolean)
9595
}
9696
}
9797

98-
// Called by DAGScheduler
99-
private[scheduler] def stageStart(stage: StageId): Unit = synchronized {
100-
authorizedCommittersByStage(stage) = mutable.HashMap[PartitionId, TaskAttemptNumber]()
98+
/**
99+
* Called by the DAGScheduler when a stage starts.
100+
*
101+
* @param stage the stage id.
102+
* @param maxPartitionId the maximum partition id that could appear in this stage's tasks (i.e.
103+
* the maximum possible value of `context.partitionId`).
104+
*/
105+
private[scheduler] def stageStart(
106+
stage: StageId,
107+
maxPartitionId: Int): Unit = {
108+
val arr = new Array[TaskAttemptNumber](maxPartitionId + 1)
109+
java.util.Arrays.fill(arr, NO_AUTHORIZED_COMMITTER)
110+
synchronized {
111+
authorizedCommittersByStage(stage) = arr
112+
}
101113
}
102114

103115
// Called by DAGScheduler
@@ -122,10 +134,10 @@ private[spark] class OutputCommitCoordinator(conf: SparkConf, isDriver: Boolean)
122134
logInfo(s"Task was denied committing, stage: $stage, partition: $partition, " +
123135
s"attempt: $attemptNumber")
124136
case otherReason =>
125-
if (authorizedCommitters.get(partition).exists(_ == attemptNumber)) {
137+
if (authorizedCommitters(partition) == attemptNumber) {
126138
logDebug(s"Authorized committer (attemptNumber=$attemptNumber, stage=$stage, " +
127139
s"partition=$partition) failed; clearing lock")
128-
authorizedCommitters.remove(partition)
140+
authorizedCommitters(partition) = NO_AUTHORIZED_COMMITTER
129141
}
130142
}
131143
}
@@ -145,16 +157,16 @@ private[spark] class OutputCommitCoordinator(conf: SparkConf, isDriver: Boolean)
145157
attemptNumber: TaskAttemptNumber): Boolean = synchronized {
146158
authorizedCommittersByStage.get(stage) match {
147159
case Some(authorizedCommitters) =>
148-
authorizedCommitters.get(partition) match {
149-
case Some(existingCommitter) =>
150-
logDebug(s"Denying attemptNumber=$attemptNumber to commit for stage=$stage, " +
151-
s"partition=$partition; existingCommitter = $existingCommitter")
152-
false
153-
case None =>
160+
authorizedCommitters(partition) match {
161+
case NO_AUTHORIZED_COMMITTER =>
154162
logDebug(s"Authorizing attemptNumber=$attemptNumber to commit for stage=$stage, " +
155163
s"partition=$partition")
156164
authorizedCommitters(partition) = attemptNumber
157165
true
166+
case existingCommitter =>
167+
logDebug(s"Denying attemptNumber=$attemptNumber to commit for stage=$stage, " +
168+
s"partition=$partition; existingCommitter = $existingCommitter")
169+
false
158170
}
159171
case None =>
160172
logDebug(s"Stage $stage has completed, so not allowing attempt number $attemptNumber of" +

core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
171171
val partition: Int = 2
172172
val authorizedCommitter: Int = 3
173173
val nonAuthorizedCommitter: Int = 100
174-
outputCommitCoordinator.stageStart(stage)
174+
outputCommitCoordinator.stageStart(stage, maxPartitionId = 2)
175175

176176
assert(outputCommitCoordinator.canCommit(stage, partition, authorizedCommitter))
177177
assert(!outputCommitCoordinator.canCommit(stage, partition, nonAuthorizedCommitter))

0 commit comments

Comments
 (0)