-
Notifications
You must be signed in to change notification settings - Fork 51
[SPARK-25299] Propose a new NIO transfer API for partition writing. #535
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
7e13ed1
81e8a86
53f6bbd
9b77268
0dd4ffa
e98661e
1faf980
d12a86c
5ae75de
ce68613
f3dac6e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.api.shuffle; | ||
|
|
||
| import java.io.IOException; | ||
| import java.nio.channels.WritableByteChannel; | ||
|
|
||
| public class DefaultTransferrableWritableByteChannel implements TransferrableWritableByteChannel { | ||
|
|
||
| private final WritableByteChannel delegate; | ||
|
|
||
| public DefaultTransferrableWritableByteChannel(WritableByteChannel delegate) { | ||
| this.delegate = delegate; | ||
| } | ||
|
|
||
| @Override | ||
| public void transferFrom( | ||
| TransferrableReadableByteChannel source, long numBytesToTransfer) throws IOException { | ||
| source.transferTo(delegate, numBytesToTransfer); | ||
| } | ||
|
|
||
| @Override | ||
| public void close() throws IOException { | ||
| delegate.close(); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,52 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.api.shuffle; | ||
|
|
||
| import java.io.IOException; | ||
| import java.nio.channels.Channels; | ||
|
|
||
| import org.apache.spark.annotation.Experimental; | ||
|
|
||
| /** | ||
| * :: Experimental :: | ||
| * Indicates that partition writers can transfer bytes directly from input byte channels to | ||
| * output channels that stream data to the underlying shuffle partition storage medium. | ||
| * <p> | ||
| * This API is separated out from ShuffleParittionWriter because it only needs to be used for | ||
| * specific low-level optimizations. | ||
| * | ||
| * @since 3.0.0 | ||
| */ | ||
| @Experimental | ||
| public interface SupportsTransferTo extends ShufflePartitionWriter { | ||
|
|
||
| /** | ||
| * Opens and returns a {@link TransferrableWritableByteChannel} for transferring bytes from | ||
| * partial input byte channels to the underlying shuffle data store. | ||
|
||
| */ | ||
| default TransferrableWritableByteChannel openTransferrableChannel() throws IOException { | ||
| return new DefaultTransferrableWritableByteChannel(Channels.newChannel(openStream())); | ||
|
||
| } | ||
|
|
||
| /** | ||
| * Returns the number of bytes written either by this writer's output stream opened by | ||
| * {@link #openStream()} or the byte channel opened by {@link #openTransferrableChannel()}. | ||
| */ | ||
| @Override | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this getting overrided? Is it for the javadoc?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes it's explicitly for JavaDoc - we have to specifically say that the count has to take into account the channel if it was opened. |
||
| long getNumBytesWritten(); | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,42 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.api.shuffle; | ||
|
|
||
| import java.io.IOException; | ||
| import java.nio.channels.WritableByteChannel; | ||
| import org.apache.spark.annotation.Experimental; | ||
|
|
||
| /** | ||
| * :: Experimental :: | ||
| * Represents a readable byte channel, where its bytes can be transferred directly to some | ||
| * {@link WritableByteChannel} instance. | ||
| * <p> | ||
| * Shuffle plugin implementations should not need to implement this, but an instance of this is | ||
| * passed in for transferring bytes to output byte channels in | ||
| * {@link TransferrableWritableByteChannel}. | ||
| * | ||
| * @since 3.0.0 | ||
| */ | ||
| @Experimental | ||
| public interface TransferrableReadableByteChannel { | ||
|
|
||
| /** | ||
| * Transfer bytes from this input channel to the given {@link WritableByteChannel}. | ||
| */ | ||
| void transferTo(WritableByteChannel outputChannel, long numBytesToTransfer) throws IOException; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.api.shuffle; | ||
|
|
||
| import java.io.Closeable; | ||
| import java.io.IOException; | ||
|
|
||
| import org.apache.spark.annotation.Experimental; | ||
|
|
||
| /** | ||
| * :: Experimental :: | ||
| * Represents an output byte channel that can copy bytes from readable byte channels to some | ||
| * arbitrary storage system. | ||
| * | ||
| * @since 3.0.0 | ||
| */ | ||
| @Experimental | ||
| public interface TransferrableWritableByteChannel extends Closeable { | ||
|
|
||
| /** | ||
| * Copy all bytes from the source readable byte channel into this byte channel. | ||
| */ | ||
| void transferFrom( | ||
| TransferrableReadableByteChannel source, long numBytesToTransfer) throws IOException; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,9 +22,11 @@ | |
| import java.io.IOException; | ||
| import java.io.OutputStream; | ||
| import java.nio.channels.FileChannel; | ||
| import java.nio.channels.WritableByteChannel; | ||
| import javax.annotation.Nullable; | ||
|
|
||
| import org.apache.spark.api.shuffle.SupportsTransferTo; | ||
| import org.apache.spark.api.shuffle.TransferrableReadableByteChannel; | ||
| import org.apache.spark.api.shuffle.TransferrableWritableByteChannel; | ||
| import scala.None$; | ||
| import scala.Option; | ||
| import scala.Product2; | ||
|
|
@@ -202,39 +204,37 @@ private long[] writePartitionedData(ShuffleMapOutputWriter mapOutputWriter) thro | |
| for (int i = 0; i < numPartitions; i++) { | ||
| final File file = partitionWriterSegments[i].file(); | ||
| boolean copyThrewException = true; | ||
| ShufflePartitionWriter writer = null; | ||
| try { | ||
| writer = mapOutputWriter.getNextPartitionWriter(); | ||
| if (!file.exists()) { | ||
| copyThrewException = false; | ||
| } else { | ||
| if (transferToEnabled) { | ||
| WritableByteChannel outputChannel = writer.toChannel(); | ||
| FileInputStream in = new FileInputStream(file); | ||
| try (FileChannel inputChannel = in.getChannel()) { | ||
| Utils.copyFileStreamNIO(inputChannel, outputChannel, 0, inputChannel.size()); | ||
| copyThrewException = false; | ||
| } finally { | ||
| Closeables.close(in, copyThrewException); | ||
| } | ||
| } else { | ||
| OutputStream tempOutputStream = writer.toStream(); | ||
| FileInputStream in = new FileInputStream(file); | ||
| try { | ||
| Utils.copyStream(in, tempOutputStream, false, false); | ||
| copyThrewException = false; | ||
| } finally { | ||
| Closeables.close(in, copyThrewException); | ||
| } | ||
| ShufflePartitionWriter writer = mapOutputWriter.getNextPartitionWriter(); | ||
| if (file.exists()) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i think you're missing the case for if
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The value of |
||
| if (transferToEnabled && writer instanceof SupportsTransferTo) { | ||
| FileInputStream in = new FileInputStream(file); | ||
| TransferrableWritableByteChannel outputChannel = null; | ||
| try (FileChannel inputChannel = in.getChannel()) { | ||
| outputChannel = ((SupportsTransferTo) writer).openTransferrableChannel(); | ||
| TransferrableReadableByteChannel inputTransferable = | ||
| new FileTransferrableReadableByteChannel(inputChannel, 0L); | ||
|
||
| outputChannel.transferFrom(inputTransferable, inputChannel.size()); | ||
|
||
| copyThrewException = false; | ||
| } finally { | ||
| Closeables.close(in, copyThrewException); | ||
| Closeables.close(outputChannel, copyThrewException); | ||
| } | ||
| if (!file.delete()) { | ||
| logger.error("Unable to delete file for partition {}", i); | ||
| } else { | ||
| FileInputStream in = new FileInputStream(file); | ||
| OutputStream outputStream = null; | ||
| try { | ||
| outputStream = writer.openStream(); | ||
| Utils.copyStream(in, outputStream, false, false); | ||
| copyThrewException = false; | ||
| } finally { | ||
| Closeables.close(in, copyThrewException); | ||
| Closeables.close(outputStream, copyThrewException); | ||
| } | ||
| } | ||
| } finally { | ||
| Closeables.close(writer, copyThrewException); | ||
| if (!file.delete()) { | ||
| logger.error("Unable to delete file for partition {}", i); | ||
| } | ||
| } | ||
|
|
||
| lengths[i] = writer.getNumBytesWritten(); | ||
| } | ||
| } finally { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,39 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.shuffle.sort; | ||
|
|
||
| import java.nio.channels.FileChannel; | ||
| import java.nio.channels.WritableByteChannel; | ||
| import org.apache.spark.api.shuffle.TransferrableReadableByteChannel; | ||
| import org.apache.spark.util.Utils; | ||
|
|
||
| public class FileTransferrableReadableByteChannel implements TransferrableReadableByteChannel { | ||
|
|
||
| private final FileChannel input; | ||
| private final long transferStartPosition; | ||
|
|
||
| public FileTransferrableReadableByteChannel(FileChannel input, long transferStartPosition) { | ||
| this.input = input; | ||
| this.transferStartPosition = transferStartPosition; | ||
| } | ||
|
|
||
| @Override | ||
| public void transferTo(WritableByteChannel outputChannel, long numBytesToTransfer) { | ||
| Utils.copyFileStreamNIO(input, outputChannel, transferStartPosition, numBytesToTransfer); | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
move this out of the api package and into
shuffle.sortalong with the other implementation classes