-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-34534][CORE] Fix blockIds order when use FetchShuffleBlocks to fetch blocks #31643
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
0af3b01
384f43c
0006553
006a5ec
a08ca1d
1234253
7987a25
e823930
0748d20
42b9621
575ca5e
6377217
9cf23df
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -81,7 +81,6 @@ public OneForOneBlockFetcher( | |
| TransportConf transportConf, | ||
| DownloadFileManager downloadFileManager) { | ||
| this.client = client; | ||
| this.blockIds = blockIds; | ||
| this.listener = listener; | ||
| this.chunkCallback = new ChunkCallback(); | ||
| this.transportConf = transportConf; | ||
|
|
@@ -90,8 +89,10 @@ public OneForOneBlockFetcher( | |
| throw new IllegalArgumentException("Zero-sized blockIds array"); | ||
| } | ||
| if (!transportConf.useOldFetchProtocol() && isShuffleBlocks(blockIds)) { | ||
| this.message = createFetchShuffleBlocksMsg(appId, execId, blockIds); | ||
| this.blockIds = new String[blockIds.length]; | ||
| this.message = createFetchShuffleBlocksMsgAndBuildBlockIds(appId, execId, blockIds); | ||
| } else { | ||
| this.blockIds = blockIds; | ||
| this.message = new OpenBlocks(appId, execId, blockIds); | ||
| } | ||
| } | ||
|
|
@@ -106,41 +107,53 @@ private boolean isShuffleBlocks(String[] blockIds) { | |
| } | ||
|
|
||
| /** | ||
| * Analyze the pass in blockIds and create FetchShuffleBlocks message. | ||
| * The blockIds has been sorted by mapId and reduceId. It's produced in | ||
| * org.apache.spark.MapOutputTracker.convertMapStatuses. | ||
| * Create FetchShuffleBlocks message and rebuild internal blockIds by | ||
| * analyzing the pass in blockIds. | ||
| */ | ||
| private FetchShuffleBlocks createFetchShuffleBlocksMsg( | ||
| private FetchShuffleBlocks createFetchShuffleBlocksMsgAndBuildBlockIds( | ||
| String appId, String execId, String[] blockIds) { | ||
| String[] firstBlock = splitBlockId(blockIds[0]); | ||
| int shuffleId = Integer.parseInt(firstBlock[1]); | ||
| boolean batchFetchEnabled = firstBlock.length == 5; | ||
|
|
||
| HashMap<Long, ArrayList<Integer>> mapIdToReduceIds = new HashMap<>(); | ||
| HashMap<Long, BlocksInfo> mapIdToBlocksInfo = new HashMap<>(); | ||
|
||
| for (String blockId : blockIds) { | ||
| String[] blockIdParts = splitBlockId(blockId); | ||
| if (Integer.parseInt(blockIdParts[1]) != shuffleId) { | ||
| throw new IllegalArgumentException("Expected shuffleId=" + shuffleId + | ||
| ", got:" + blockId); | ||
| } | ||
| long mapId = Long.parseLong(blockIdParts[2]); | ||
| if (!mapIdToReduceIds.containsKey(mapId)) { | ||
| mapIdToReduceIds.put(mapId, new ArrayList<>()); | ||
| if (!mapIdToBlocksInfo.containsKey(mapId)) { | ||
| mapIdToBlocksInfo.put(mapId, new BlocksInfo(new ArrayList<>(), new ArrayList<>())); | ||
| } | ||
| mapIdToReduceIds.get(mapId).add(Integer.parseInt(blockIdParts[3])); | ||
| BlocksInfo blocksInfoByMapId = mapIdToBlocksInfo.get(mapId); | ||
| blocksInfoByMapId.blockIds.add(blockId); | ||
| blocksInfoByMapId.reduceIds.add(Integer.parseInt(blockIdParts[3])); | ||
| if (batchFetchEnabled) { | ||
| // When we read continuous shuffle blocks in batch, we will reuse reduceIds in | ||
| // FetchShuffleBlocks to store the start and end reduce id for range | ||
| // [startReduceId, endReduceId). | ||
| assert(blockIdParts.length == 5); | ||
| mapIdToReduceIds.get(mapId).add(Integer.parseInt(blockIdParts[4])); | ||
| blocksInfoByMapId.reduceIds.add(Integer.parseInt(blockIdParts[4])); | ||
| } | ||
| } | ||
| long[] mapIds = Longs.toArray(mapIdToReduceIds.keySet()); | ||
| long[] mapIds = Longs.toArray(mapIdToBlocksInfo.keySet()); | ||
| int[][] reduceIdArr = new int[mapIds.length][]; | ||
| int blockIdIndex = 0; | ||
| for (int i = 0; i < mapIds.length; i++) { | ||
| reduceIdArr[i] = Ints.toArray(mapIdToReduceIds.get(mapIds[i])); | ||
| BlocksInfo blocksInfoByMapId = mapIdToBlocksInfo.get(mapIds[i]); | ||
| reduceIdArr[i] = Ints.toArray(blocksInfoByMapId.reduceIds); | ||
|
|
||
| // The `blockIds`'s order must be same with the read order specified in in FetchShuffleBlocks | ||
| // because the shuffle data's return order should match the `blockIds`'s order to ensure | ||
| // blockId and data match. | ||
| for (int j = 0; j < blocksInfoByMapId.blockIds.size(); j++) { | ||
| this.blockIds[blockIdIndex++] = blocksInfoByMapId.blockIds.get(j); | ||
| } | ||
| } | ||
| assert(blockIdIndex == this.blockIds.length); | ||
|
|
||
| return new FetchShuffleBlocks( | ||
| appId, execId, shuffleId, mapIds, reduceIdArr, batchFetchEnabled); | ||
| } | ||
|
|
@@ -157,6 +170,18 @@ private String[] splitBlockId(String blockId) { | |
| return blockIdParts; | ||
| } | ||
|
|
||
| /** The reduceIds and blocks in a single mapId */ | ||
| private class BlocksInfo { | ||
|
|
||
| ArrayList<Integer> reduceIds; | ||
seayoun marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| ArrayList<String> blockIds; | ||
|
|
||
| public BlocksInfo(ArrayList<Integer> reduceIds, ArrayList<String> blockIds) { | ||
seayoun marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| this.reduceIds = reduceIds; | ||
| this.blockIds = blockIds; | ||
| } | ||
| } | ||
|
|
||
| /** Callback invoked on receipt of each chunk. We equate a single chunk to a single block. */ | ||
| private class ChunkCallback implements ChunkReceivedCallback { | ||
| @Override | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to remove this from the constructor, line 78 and line 69?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Or, we cannot change it because it's a public class?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The blockIds will be used to create OpenBlocks or ShuffleFetchBlocks later in constructor.