first pass of comments

bloomberg · ifilonenko · Mar 27, 2019 · Mar 14, 2019 · Mar 20, 2019 · Mar 22, 2019
commit 460f0ea468576f9b47fdccb8f49685b1293db5e7
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
@@ -17,12 +17,10 @@
 
 package org.apache.spark.shuffle.sort;
 
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
+import java.io.*;
 import javax.annotation.Nullable;
 
+import org.apache.spark.api.shuffle.ShufflePartitionWriter;
 import scala.None$;
 import scala.Option;
 import scala.Product2;
@@ -34,6 +32,8 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.apache.spark.api.shuffle.ShuffleExecutorComponents;
+import org.apache.spark.api.shuffle.ShuffleMapOutputWriter;
 import org.apache.spark.internal.config.package$;
 import org.apache.spark.Partitioner;
 import org.apache.spark.ShuffleDependency;
@@ -79,9 +79,11 @@ final class BypassMergeSortShuffleWriter<K, V> extends ShuffleWriter<K, V> {
   private final BlockManager blockManager;
   private final Partitioner partitioner;
   private final ShuffleWriteMetricsReporter writeMetrics;
+  private final String appId;
   private final int shuffleId;
   private final int mapId;
   private final Serializer serializer;
+  private final ShuffleExecutorComponents shuffleExecutorComponents;
   private final IndexShuffleBlockResolver shuffleBlockResolver;
 
   /** Array of file writers, one for each partition */
@@ -103,70 +105,76 @@ final class BypassMergeSortShuffleWriter<K, V> extends ShuffleWriter<K, V> {
       BypassMergeSortShuffleHandle<K, V> handle,
       int mapId,
       SparkConf conf,
-      ShuffleWriteMetricsReporter writeMetrics) {
+      ShuffleWriteMetricsReporter writeMetrics,
+      ShuffleExecutorComponents shuffleExecutorComponents) {
     // Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided
     this.fileBufferSize = (int) (long) conf.get(package$.MODULE$.SHUFFLE_FILE_BUFFER_SIZE()) * 1024;
     this.transferToEnabled = conf.getBoolean("spark.file.transferTo", true);
     this.blockManager = blockManager;
     final ShuffleDependency<K, V, V> dep = handle.dependency();
+    this.appId = conf.getAppId();
     this.mapId = mapId;
     this.shuffleId = dep.shuffleId();
     this.partitioner = dep.partitioner();
     this.numPartitions = partitioner.numPartitions();
     this.writeMetrics = writeMetrics;
     this.serializer = dep.serializer();
     this.shuffleBlockResolver = shuffleBlockResolver;
+    this.shuffleExecutorComponents = shuffleExecutorComponents;
   }
 
   @Override
   public void write(Iterator<Product2<K, V>> records) throws IOException {
     assert (partitionWriters == null);
-    if (!records.hasNext()) {
-      partitionLengths = new long[numPartitions];
-      shuffleBlockResolver.writeIndexFileAndCommit(shuffleId, mapId, partitionLengths, null);
-      mapStatus = MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths);
-      return;
-    }
-    final SerializerInstance serInstance = serializer.newInstance();
-    final long openStartTime = System.nanoTime();
-    partitionWriters = new DiskBlockObjectWriter[numPartitions];
-    partitionWriterSegments = new FileSegment[numPartitions];
-    for (int i = 0; i < numPartitions; i++) {
-      final Tuple2<TempShuffleBlockId, File> tempShuffleBlockIdPlusFile =
-        blockManager.diskBlockManager().createTempShuffleBlock();
-      final File file = tempShuffleBlockIdPlusFile._2();
-      final BlockId blockId = tempShuffleBlockIdPlusFile._1();
-      partitionWriters[i] =
-        blockManager.getDiskWriter(blockId, file, serInstance, fileBufferSize, writeMetrics);
-    }
-    // Creating the file to write to and creating a disk writer both involve interacting with
-    // the disk, and can take a long time in aggregate when we open many files, so should be
-    // included in the shuffle write time.
-    writeMetrics.incWriteTime(System.nanoTime() - openStartTime);
-
-    while (records.hasNext()) {
-      final Product2<K, V> record = records.next();
-      final K key = record._1();
-      partitionWriters[partitioner.getPartition(key)].write(key, record._2());
-    }
+    ShuffleMapOutputWriter mapOutputWriter = shuffleExecutorComponents.writes()
+      .createMapOutputWriter(appId, shuffleId, mapId, numPartitions);
+    try {
+      if (!records.hasNext()) {
+        partitionLengths = new long[numPartitions];
+        mapOutputWriter.commitAllPartitions();
+        mapStatus = MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths);
+        return;
+      }
+      final SerializerInstance serInstance = serializer.newInstance();
+      final long openStartTime = System.nanoTime();
+      partitionWriters = new DiskBlockObjectWriter[numPartitions];
+      partitionWriterSegments = new FileSegment[numPartitions];
+      for (int i = 0; i < numPartitions; i++) {
+        final Tuple2<TempShuffleBlockId, File> tempShuffleBlockIdPlusFile =
+                blockManager.diskBlockManager().createTempShuffleBlock();
+        final File file = tempShuffleBlockIdPlusFile._2();
+        final BlockId blockId = tempShuffleBlockIdPlusFile._1();
+        partitionWriters[i] =
+          blockManager.getDiskWriter(blockId, file, serInstance, fileBufferSize, writeMetrics);
+      }
+      // Creating the file to write to and creating a disk writer both involve interacting with
+      // the disk, and can take a long time in aggregate when we open many files, so should be
+      // included in the shuffle write time.
+      writeMetrics.incWriteTime(System.nanoTime() - openStartTime);
 
-    for (int i = 0; i < numPartitions; i++) {
-      try (DiskBlockObjectWriter writer = partitionWriters[i]) {
-        partitionWriterSegments[i] = writer.commitAndGet();
+      while (records.hasNext()) {
+        final Product2<K, V> record = records.next();
+        final K key = record._1();
+        partitionWriters[partitioner.getPartition(key)].write(key, record._2());
       }
-    }
 
-    File output = shuffleBlockResolver.getDataFile(shuffleId, mapId);
-    File tmp = Utils.tempFileWith(output);
-    try {
-      partitionLengths = writePartitionedFile(tmp);
-      shuffleBlockResolver.writeIndexFileAndCommit(shuffleId, mapId, partitionLengths, tmp);
-    } finally {
-      if (tmp.exists() && !tmp.delete()) {
-        logger.error("Error while deleting temp file {}", tmp.getAbsolutePath());
+      for (int i = 0; i < numPartitions; i++) {
+        try (DiskBlockObjectWriter writer = partitionWriters[i]) {
+          partitionWriterSegments[i] = writer.commitAndGet();
+        }
+      }
+
+      partitionLengths = writePartitionedData(mapOutputWriter);
+      mapOutputWriter.commitAllPartitions();
+      mapStatus = MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths);
+    } catch (Exception e) {
+      try {
+        mapOutputWriter.abort(e);
+      } catch (Exception e2) {
+        logger.error("Failed to abort the writer after failing to write map output.", e2);
       }
+      throw e;
     }
-    mapStatus = MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths);
   }
 
   @VisibleForTesting
@@ -179,37 +187,38 @@ long[] getPartitionLengths() {
    *
    * @return array of lengths, in bytes, of each partition of the file (used by map output tracker).
    */
-  private long[] writePartitionedFile(File outputFile) throws IOException {
+  private long[] writePartitionedData(ShuffleMapOutputWriter mapOutputWriter) throws IOException {
     // Track location of the partition starts in the output file
     final long[] lengths = new long[numPartitions];
     if (partitionWriters == null) {
       // We were passed an empty iterator
       return lengths;
     }
 
-    final FileOutputStream out = new FileOutputStream(outputFile, true);
     final long writeStartTime = System.nanoTime();
-    boolean threwException = true;
     try {
       for (int i = 0; i < numPartitions; i++) {
         final File file = partitionWriterSegments[i].file();
-        if (file.exists()) {
-          final FileInputStream in = new FileInputStream(file);
-          boolean copyThrewException = true;
-          try {
-            lengths[i] = Utils.copyStream(in, out, false, transferToEnabled);
-            copyThrewException = false;
-          } finally {
-            Closeables.close(in, copyThrewException);
-          }
-          if (!file.delete()) {
-            logger.error("Unable to delete file for partition {}", i);
+        boolean copyThrewException = true;
+        // TODO: Enable transferTo
+        ShufflePartitionWriter writer = mapOutputWriter.getNextPartitionWriter();
+        try (OutputStream tempOutputStream = writer.openStream()) {
+          if (file.exists()) {
+            FileInputStream in = new FileInputStream(file);
+            try {
+              Utils.copyStream(in, tempOutputStream, false, false);
+              copyThrewException = false;
+            } finally {
+              Closeables.close(in, copyThrewException);
+            }
           }
         }
+        lengths[i] = writer.getLength();
+        if (file.exists() && !file.delete()) {
+          logger.error("Unable to delete file for partition {}", i);
+        }
       }
-      threwException = false;
     } finally {
-      Closeables.close(out, threwException);
       writeMetrics.incWriteTime(System.nanoTime() - writeStartTime);
     }
     partitionWriters = null;

diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/io/DefaultShuffleBlockOutputStream.java b/core/src/main/java/org/apache/spark/shuffle/sort/io/DefaultShuffleBlockOutputStream.java
@@ -20,7 +20,5 @@
 import java.io.OutputStream;
 
 public abstract class DefaultShuffleBlockOutputStream extends OutputStream {
-
-    public abstract int getCount();
-
+  public abstract int getCount();
 }
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/io/DefaultShuffleExecutorComponents.java b/core/src/main/java/org/apache/spark/shuffle/sort/io/DefaultShuffleExecutorComponents.java
@@ -54,4 +54,4 @@ public ShuffleWriteSupport writes() {
     return new DefaultShuffleWriteSupport(
       sparkConf, blockResolver, metrics.shuffleWriteMetrics());
   }
-}
+}
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/io/DefaultShuffleMapOutputWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/io/DefaultShuffleMapOutputWriter.java
@@ -17,10 +17,10 @@
 
 package org.apache.spark.shuffle.sort.io;
 
-import java.io.BufferedOutputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
+import java.io.*;
+import java.nio.channels.Channels;
+import java.nio.channels.WritableByteChannel;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -40,13 +40,12 @@ public class DefaultShuffleMapOutputWriter implements ShuffleMapOutputWriter {
 
   private final int shuffleId;
   private final int mapId;
-  private final int numPartitions;
   private final ShuffleWriteMetricsReporter metrics;
   private final IndexShuffleBlockResolver blockResolver;
   private final long[] partitionLengths;
-  private ShufflePartitionWriter[] partitionWriters;
   private final int bufferSize;
   private int currPartitionId = 0;
+  private boolean successfulWrite = false;
 
   private final File outputFile;
   private final File outputTempFile;
@@ -63,45 +62,38 @@ public DefaultShuffleMapOutputWriter(
       SparkConf sparkConf) {
     this.shuffleId = shuffleId;
     this.mapId = mapId;
-    this.numPartitions = numPartitions;
     this.metrics = metrics;
     this.blockResolver = blockResolver;
     this.bufferSize =
       (int) (long) sparkConf.get(
         package$.MODULE$.SHUFFLE_UNSAFE_FILE_OUTPUT_BUFFER_SIZE()) * 1024;
     this.partitionLengths = new long[numPartitions];
-    this.partitionWriters = new ShufflePartitionWriter[numPartitions];
     this.outputFile = blockResolver.getDataFile(shuffleId, mapId);
     this.outputTempFile = Utils.tempFileWith(outputFile);
   }
 
   @Override
   public ShufflePartitionWriter getNextPartitionWriter() throws IOException {
     initStream();
-    ShufflePartitionWriter shufflePartitionWriter =
-      new DefaultShufflePartitionWriter(
-        new DefaultShuffleBlockOutputStreamImpl());
-    partitionWriters[currPartitionId++] = shufflePartitionWriter;
-    return shufflePartitionWriter;
+    return new DefaultShufflePartitionWriter(currPartitionId++);
   }
 
   @Override
   public void commitAllPartitions() throws IOException {
-    for (int pId = 0; pId < numPartitions; pId ++) {
-      partitionLengths[pId] = partitionWriters[pId].getLength();
-    }
+    cleanUp();
     blockResolver.writeIndexFileAndCommit(shuffleId, mapId, partitionLengths, outputTempFile);
-    // TODO: Maybe monitor a little more intelligably then checking files?
-    if (!outputFile.getParentFile().isDirectory() && !outputFile.getParentFile().mkdirs()) {
-      throw new IOException(
-              String.format(
-                      "Failed to create shuffle file directory at %s.",
-                      outputFile.getParentFile().getAbsolutePath()));
-    }
-    if (!outputFile.isFile() && !outputFile.createNewFile()) {
-      throw new IOException(
-              String.format(
-                      "Failed to create empty shuffle file at %s.", outputFile.getAbsolutePath()));
+    if (!successfulWrite) {
+      if (!outputFile.getParentFile().isDirectory() && !outputFile.getParentFile().mkdirs()) {
+        throw new IOException(
+          String.format(
+            "Failed to create shuffle file directory at %s.",
+            outputFile.getParentFile().getAbsolutePath()));
+      }
+      if (!outputFile.isFile() && !outputFile.createNewFile()) {
+        throw new IOException(
+          String.format(
+            "Failed to create empty shuffle file at %s.", outputFile.getAbsolutePath()));
+      }
     }
   }
 
@@ -138,10 +130,42 @@ private void initStream() throws IOException {
     }
   }
 
-  private class DefaultShuffleBlockOutputStreamImpl extends DefaultShuffleBlockOutputStream {
+  private class DefaultShufflePartitionWriter implements ShufflePartitionWriter {
 
-    private boolean isClosed = false;
+    private final int partitionId;
+    private final DefaultShuffleBlockOutputStream stream;
+
+    private DefaultShufflePartitionWriter(int partitionId) {
+      this.partitionId = partitionId;
+      this.stream = new DefaultShuffleBlockOutputStreamImpl();
+    }
+
+    @Override
+    public OutputStream openStream() throws IOException {
+      return stream;
+    }
+
+    @Override
+    public long getLength() {
+      try {
+        stream.close();
+      } catch (Exception e) {
+        log.error("Error with closing stream for partition", e);
+      }
+      int length = stream.getCount();
+      partitionLengths[partitionId] = length;
+      return length;
+    }
+
+    @Override
+    public WritableByteChannel openChannel() throws IOException {
+      return Channels.newChannel(outputFileStream);
+    }
+  }
+
+  private class DefaultShuffleBlockOutputStreamImpl extends DefaultShuffleBlockOutputStream {
     private int count = 0;
+    private boolean isClosed = false;
 
     @Override
     public int getCount() {
@@ -153,6 +177,7 @@ public void write(int b) throws IOException {
       if (isClosed) {
         throw new IllegalStateException("Attempting to write to a closed block byte channel.");
       }
+      successfulWrite = true;
       outputBufferedFileStream.write(b);
       count++;
     }
-Original file line number
+Diff line change
@@ Expand Up / @@ -54,4 +54,4 @@ public ShuffleWriteSupport writes() { @@
         return new DefaultShuffleWriteSupport(
           sparkConf, blockResolver, metrics.shuffleWriteMetrics());
       }
-    }
+    }