-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-25341][Core] Support rolling back a shuffle map stage and re-generate the shuffle files #25620
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-25341][Core] Support rolling back a shuffle map stage and re-generate the shuffle files #25620
Changes from 1 commit
bbce8b4
578c233
cb612e5
f4471b2
b31d1f5
ff8fde9
2bb4388
061e363
212b201
0d91544
da73b56
69d59a1
c86f6cc
d2215b2
28c9f9c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -49,9 +49,10 @@ import org.apache.spark.util.{CompletionIterator, TaskCompletionListener, Utils} | |
| * @param shuffleClient [[BlockStoreClient]] for fetching remote blocks | ||
| * @param blockManager [[BlockManager]] for reading local blocks | ||
| * @param blocksByAddress list of blocks to fetch grouped by the [[BlockManagerId]]. | ||
| * For each block we also require the size (in bytes as a long field) in | ||
| * order to throttle the memory usage. Note that zero-sized blocks are | ||
| * already excluded, which happened in | ||
| * For each block we also require two info: 1. the size (in bytes as a long | ||
| * field) in order to throttle the memory usage; 2. the mapId for this | ||
| * block, which indicate the index in the map stage of the block. | ||
| * Note that zero-sized blocks are already excluded, which happened in | ||
| * [[org.apache.spark.MapOutputTracker.convertMapStatuses]]. | ||
| * @param streamWrapper A function to wrap the returned input stream. | ||
| * @param maxBytesInFlight max size (in bytes) of remote blocks to fetch at any given point. | ||
|
|
@@ -67,7 +68,7 @@ final class ShuffleBlockFetcherIterator( | |
| context: TaskContext, | ||
| shuffleClient: BlockStoreClient, | ||
| blockManager: BlockManager, | ||
| blocksByAddress: Iterator[(BlockManagerId, Seq[(BlockId, Long)])], | ||
| blocksByAddress: Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])], | ||
| streamWrapper: (BlockId, InputStream) => InputStream, | ||
| maxBytesInFlight: Long, | ||
| maxReqsInFlight: Int, | ||
|
|
@@ -97,7 +98,7 @@ final class ShuffleBlockFetcherIterator( | |
| private[this] val startTimeNs = System.nanoTime() | ||
|
|
||
| /** Local blocks to fetch, excluding zero-sized blocks. */ | ||
| private[this] val localBlocks = scala.collection.mutable.LinkedHashSet[BlockId]() | ||
| private[this] val localBlocks = scala.collection.mutable.LinkedHashSet[(BlockId, Int)]() | ||
|
|
||
| /** Remote blocks to fetch, excluding zero-sized blocks. */ | ||
| private[this] val remoteBlocks = new HashSet[BlockId]() | ||
|
|
@@ -199,7 +200,7 @@ final class ShuffleBlockFetcherIterator( | |
| while (iter.hasNext) { | ||
| val result = iter.next() | ||
| result match { | ||
| case SuccessFetchResult(_, address, _, buf, _) => | ||
| case SuccessFetchResult(_, _, address, _, buf, _) => | ||
| if (address != blockManager.blockManagerId) { | ||
| shuffleMetrics.incRemoteBytesRead(buf.size) | ||
| if (buf.isInstanceOf[FileSegmentManagedBuffer]) { | ||
|
|
@@ -224,9 +225,11 @@ final class ShuffleBlockFetcherIterator( | |
| bytesInFlight += req.size | ||
| reqsInFlight += 1 | ||
|
|
||
| // so we can look up the size of each blockID | ||
| val sizeMap = req.blocks.map { case (blockId, size) => (blockId.toString, size) }.toMap | ||
| val remainingBlocks = new HashSet[String]() ++= sizeMap.keys | ||
| // so we can look up the block info of each blockID | ||
| val infoMap = req.blocks.map { | ||
| case (blockId, size, mapId) => (blockId.toString, (size, mapId)) | ||
| }.toMap | ||
| val remainingBlocks = new HashSet[String]() ++= infoMap.keys | ||
| val blockIds = req.blocks.map(_._1.toString) | ||
| val address = req.address | ||
|
|
||
|
|
@@ -240,8 +243,8 @@ final class ShuffleBlockFetcherIterator( | |
| // This needs to be released after use. | ||
| buf.retain() | ||
| remainingBlocks -= blockId | ||
| results.put(new SuccessFetchResult(BlockId(blockId), address, sizeMap(blockId), buf, | ||
| remainingBlocks.isEmpty)) | ||
| results.put(new SuccessFetchResult(BlockId(blockId), infoMap(blockId)._2, | ||
| address, infoMap(blockId)._1, buf, remainingBlocks.isEmpty)) | ||
| logDebug("remainingBlocks: " + remainingBlocks) | ||
| } | ||
| } | ||
|
|
@@ -250,7 +253,7 @@ final class ShuffleBlockFetcherIterator( | |
|
|
||
| override def onBlockFetchFailure(blockId: String, e: Throwable): Unit = { | ||
| logError(s"Failed to get block(s) from ${req.address.host}:${req.address.port}", e) | ||
| results.put(new FailureFetchResult(BlockId(blockId), address, e)) | ||
| results.put(new FailureFetchResult(BlockId(blockId), infoMap(blockId)._2, address, e)) | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -283,28 +286,28 @@ final class ShuffleBlockFetcherIterator( | |
| for ((address, blockInfos) <- blocksByAddress) { | ||
| if (address.executorId == blockManager.blockManagerId.executorId) { | ||
| blockInfos.find(_._2 <= 0) match { | ||
| case Some((blockId, size)) if size < 0 => | ||
| case Some((blockId, size, _)) if size < 0 => | ||
| throw new BlockException(blockId, "Negative block size " + size) | ||
| case Some((blockId, size)) if size == 0 => | ||
| case Some((blockId, size, _)) if size == 0 => | ||
| throw new BlockException(blockId, "Zero-sized blocks should be excluded.") | ||
| case None => // do nothing. | ||
| } | ||
| localBlocks ++= blockInfos.map(_._1) | ||
| localBlocks ++= blockInfos.map(info => (info._1, info._3)) | ||
| localBlockBytes += blockInfos.map(_._2).sum | ||
| numBlocksToFetch += localBlocks.size | ||
| } else { | ||
| val iterator = blockInfos.iterator | ||
| var curRequestSize = 0L | ||
| var curBlocks = new ArrayBuffer[(BlockId, Long)] | ||
| var curBlocks = new ArrayBuffer[(BlockId, Long, Int)] | ||
| while (iterator.hasNext) { | ||
| val (blockId, size) = iterator.next() | ||
| val (blockId, size, mapId) = iterator.next() | ||
| remoteBlockBytes += size | ||
| if (size < 0) { | ||
| throw new BlockException(blockId, "Negative block size " + size) | ||
| } else if (size == 0) { | ||
| throw new BlockException(blockId, "Zero-sized blocks should be excluded.") | ||
| } else { | ||
| curBlocks += ((blockId, size)) | ||
| curBlocks += ((blockId, size, mapId)) | ||
| remoteBlocks += blockId | ||
| numBlocksToFetch += 1 | ||
| curRequestSize += size | ||
|
|
@@ -315,7 +318,7 @@ final class ShuffleBlockFetcherIterator( | |
| remoteRequests += new FetchRequest(address, curBlocks) | ||
| logDebug(s"Creating fetch request of $curRequestSize at $address " | ||
| + s"with ${curBlocks.size} blocks") | ||
| curBlocks = new ArrayBuffer[(BlockId, Long)] | ||
| curBlocks = new ArrayBuffer[(BlockId, Long, Int)] | ||
| curRequestSize = 0 | ||
| } | ||
| } | ||
|
|
@@ -341,13 +344,13 @@ final class ShuffleBlockFetcherIterator( | |
| logDebug(s"Start fetching local blocks: ${localBlocks.mkString(", ")}") | ||
| val iter = localBlocks.iterator | ||
| while (iter.hasNext) { | ||
| val blockId = iter.next() | ||
| val (blockId, mapId) = iter.next() | ||
| try { | ||
| val buf = blockManager.getBlockData(blockId) | ||
| shuffleMetrics.incLocalBlocksFetched(1) | ||
| shuffleMetrics.incLocalBytesRead(buf.size) | ||
| buf.retain() | ||
| results.put(new SuccessFetchResult(blockId, blockManager.blockManagerId, | ||
| results.put(new SuccessFetchResult(blockId, mapId, blockManager.blockManagerId, | ||
| buf.size(), buf, false)) | ||
| } catch { | ||
| // If we see an exception, stop immediately. | ||
|
|
@@ -360,7 +363,7 @@ final class ShuffleBlockFetcherIterator( | |
| logError("Error occurred while fetching local blocks, " + ce.getMessage) | ||
| case ex: Exception => logError("Error occurred while fetching local blocks", ex) | ||
| } | ||
| results.put(new FailureFetchResult(blockId, blockManager.blockManagerId, e)) | ||
| results.put(new FailureFetchResult(blockId, mapId, blockManager.blockManagerId, e)) | ||
| return | ||
| } | ||
| } | ||
|
|
@@ -420,7 +423,7 @@ final class ShuffleBlockFetcherIterator( | |
| shuffleMetrics.incFetchWaitTime(fetchWaitTime) | ||
|
|
||
| result match { | ||
| case r @ SuccessFetchResult(blockId, address, size, buf, isNetworkReqDone) => | ||
| case r @ SuccessFetchResult(blockId, mapId, address, size, buf, isNetworkReqDone) => | ||
| if (address != blockManager.blockManagerId) { | ||
| numBlocksInFlightPerAddress(address) = numBlocksInFlightPerAddress(address) - 1 | ||
| shuffleMetrics.incRemoteBytesRead(buf.size) | ||
|
|
@@ -429,7 +432,7 @@ final class ShuffleBlockFetcherIterator( | |
| } | ||
| shuffleMetrics.incRemoteBlocksFetched(1) | ||
| } | ||
| if (!localBlocks.contains(blockId)) { | ||
| if (!localBlocks.contains((blockId, mapId))) { | ||
| bytesInFlight -= size | ||
| } | ||
| if (isNetworkReqDone) { | ||
|
|
@@ -453,7 +456,7 @@ final class ShuffleBlockFetcherIterator( | |
| // since the last call. | ||
| val msg = s"Received a zero-size buffer for block $blockId from $address " + | ||
| s"(expectedApproxSize = $size, isNetworkReqDone=$isNetworkReqDone)" | ||
| throwFetchFailedException(blockId, address, new IOException(msg)) | ||
| throwFetchFailedException(blockId, mapId, address, new IOException(msg)) | ||
| } | ||
|
|
||
| val in = try { | ||
|
|
@@ -469,7 +472,7 @@ final class ShuffleBlockFetcherIterator( | |
| case e: IOException => logError("Failed to create input stream from local block", e) | ||
| } | ||
| buf.release() | ||
| throwFetchFailedException(blockId, address, e) | ||
| throwFetchFailedException(blockId, mapId, address, e) | ||
| } | ||
| try { | ||
| input = streamWrapper(blockId, in) | ||
|
|
@@ -487,11 +490,11 @@ final class ShuffleBlockFetcherIterator( | |
| buf.release() | ||
| if (buf.isInstanceOf[FileSegmentManagedBuffer] | ||
| || corruptedBlocks.contains(blockId)) { | ||
| throwFetchFailedException(blockId, address, e) | ||
| throwFetchFailedException(blockId, mapId, address, e) | ||
| } else { | ||
| logWarning(s"got an corrupted block $blockId from $address, fetch again", e) | ||
| corruptedBlocks += blockId | ||
| fetchRequests += FetchRequest(address, Array((blockId, size))) | ||
| fetchRequests += FetchRequest(address, Array((blockId, size, mapId))) | ||
| result = null | ||
| } | ||
| } finally { | ||
|
|
@@ -503,8 +506,8 @@ final class ShuffleBlockFetcherIterator( | |
| } | ||
| } | ||
|
|
||
| case FailureFetchResult(blockId, address, e) => | ||
| throwFetchFailedException(blockId, address, e) | ||
| case FailureFetchResult(blockId, mapId, address, e) => | ||
| throwFetchFailedException(blockId, mapId, address, e) | ||
| } | ||
|
|
||
| // Send fetch requests up to maxBytesInFlight | ||
|
|
@@ -517,6 +520,7 @@ final class ShuffleBlockFetcherIterator( | |
| input, | ||
| this, | ||
| currentResult.blockId, | ||
| currentResult.mapId, | ||
| currentResult.address, | ||
| detectCorrupt && streamCompressedOrEncrypted)) | ||
| } | ||
|
|
@@ -583,10 +587,11 @@ final class ShuffleBlockFetcherIterator( | |
|
|
||
| private[storage] def throwFetchFailedException( | ||
| blockId: BlockId, | ||
| mapId: Int, | ||
| address: BlockManagerId, | ||
| e: Throwable) = { | ||
| blockId match { | ||
| case ShuffleBlockId(shufId, mapId, reduceId) => | ||
| case ShuffleBlockId(shufId, _, reduceId) => | ||
| throw new FetchFailedException(address, shufId.toInt, mapId.toInt, reduceId, e) | ||
| case _ => | ||
| throw new SparkException( | ||
|
|
@@ -604,6 +609,7 @@ private class BufferReleasingInputStream( | |
| private[storage] val delegate: InputStream, | ||
| private val iterator: ShuffleBlockFetcherIterator, | ||
| private val blockId: BlockId, | ||
| private val mapId: Int, | ||
xuanyuanking marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| private val address: BlockManagerId, | ||
| private val detectCorruption: Boolean) | ||
| extends InputStream { | ||
|
|
@@ -615,7 +621,7 @@ private class BufferReleasingInputStream( | |
| } catch { | ||
| case e: IOException if detectCorruption => | ||
| IOUtils.closeQuietly(this) | ||
| iterator.throwFetchFailedException(blockId, address, e) | ||
| iterator.throwFetchFailedException(blockId, mapId, address, e) | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -637,7 +643,7 @@ private class BufferReleasingInputStream( | |
| } catch { | ||
| case e: IOException if detectCorruption => | ||
| IOUtils.closeQuietly(this) | ||
| iterator.throwFetchFailedException(blockId, address, e) | ||
| iterator.throwFetchFailedException(blockId, mapId, address, e) | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -649,7 +655,7 @@ private class BufferReleasingInputStream( | |
| } catch { | ||
| case e: IOException if detectCorruption => | ||
| IOUtils.closeQuietly(this) | ||
| iterator.throwFetchFailedException(blockId, address, e) | ||
| iterator.throwFetchFailedException(blockId, mapId, address, e) | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -659,7 +665,7 @@ private class BufferReleasingInputStream( | |
| } catch { | ||
| case e: IOException if detectCorruption => | ||
| IOUtils.closeQuietly(this) | ||
| iterator.throwFetchFailedException(blockId, address, e) | ||
| iterator.throwFetchFailedException(blockId, mapId, address, e) | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -694,9 +700,10 @@ object ShuffleBlockFetcherIterator { | |
| * A request to fetch blocks from a remote BlockManager. | ||
| * @param address remote BlockManager to fetch from. | ||
| * @param blocks Sequence of tuple, where the first element is the block id, | ||
| * and the second element is the estimated size, used to calculate bytesInFlight. | ||
| * and the second element is the estimated size, used to calculate bytesInFlight, | ||
| * the third element is the mapId. | ||
| */ | ||
| case class FetchRequest(address: BlockManagerId, blocks: Seq[(BlockId, Long)]) { | ||
| case class FetchRequest(address: BlockManagerId, blocks: Seq[(BlockId, Long, Int)]) { | ||
|
||
| val size = blocks.map(_._2).sum | ||
| } | ||
|
|
||
|
|
@@ -711,6 +718,7 @@ object ShuffleBlockFetcherIterator { | |
| /** | ||
| * Result of a fetch from a remote block successfully. | ||
| * @param blockId block id | ||
| * @param mapId mapId for this block | ||
| * @param address BlockManager that the block was fetched from. | ||
| * @param size estimated size of the block. Note that this is NOT the exact bytes. | ||
| * Size of remote block is used to calculate bytesInFlight. | ||
|
|
@@ -719,6 +727,7 @@ object ShuffleBlockFetcherIterator { | |
| */ | ||
| private[storage] case class SuccessFetchResult( | ||
| blockId: BlockId, | ||
| mapId: Int, | ||
|
||
| address: BlockManagerId, | ||
| size: Long, | ||
| buf: ManagedBuffer, | ||
|
|
@@ -730,11 +739,13 @@ object ShuffleBlockFetcherIterator { | |
| /** | ||
| * Result of a fetch from a remote block unsuccessfully. | ||
| * @param blockId block id | ||
| * @param mapId mapId for this block | ||
| * @param address BlockManager that the block was attempted to be fetched from | ||
| * @param e the failure exception | ||
| */ | ||
| private[storage] case class FailureFetchResult( | ||
| blockId: BlockId, | ||
| mapId: Int, | ||
| address: BlockManagerId, | ||
| e: Throwable) | ||
| extends FetchResult | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.