apache · advancedxy · Jan 31, 2018 · Feb 9, 2018 · Feb 26, 2018 · Feb 27, 2018
diff --git a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
@@ -95,7 +95,7 @@ private[spark] class BlockStoreShuffleReader[K, C](
     }
 
     // Sort the output if there is a sort ordering defined.
-    dep.keyOrdering match {
+    val resultIter = dep.keyOrdering match {
       case Some(keyOrd: Ordering[K]) =>
         // Create an ExternalSorter to sort the data.
         val sorter =
@@ -104,9 +104,18 @@ private[spark] class BlockStoreShuffleReader[K, C](
         context.taskMetrics().incMemoryBytesSpilled(sorter.memoryBytesSpilled)
         context.taskMetrics().incDiskBytesSpilled(sorter.diskBytesSpilled)
         context.taskMetrics().incPeakExecutionMemory(sorter.peakMemoryUsedBytes)
+        // Use completion callback to stop sorter if task was cancelled.
+        context.addTaskCompletionListener(tc => {
+          // Note: we only stop sorter if cancelled as sorter.stop wouldn't be called in
+          // CompletionIterator. Another way would be making sorter.stop idempotent.
+          if (tc.isInterrupted()) { sorter.stop() }
+        })
         CompletionIterator[Product2[K, C], Iterator[Product2[K, C]]](sorter.iterator, sorter.stop())
       case None =>
         aggregatedIter
     }
+    // Use another interruptible iterator here to support task cancellation as aggregator or(and)
+    // sorter may have consumed previous interruptible iterator.
+    new InterruptibleIterator[Product2[K, C]](context, resultIter)
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark
 
 import java.util.concurrent.Semaphore
+import java.util.concurrent.atomic.AtomicInteger
 
 import scala.concurrent.ExecutionContext.Implicits.global
 import scala.concurrent.Future
@@ -320,6 +321,41 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
     f2.get()
   }
 
+  test("Interruptible iterator of shuffle reader") {
+    import JobCancellationSuite._
+    sc = new SparkContext("local[2]", "test")
+
+    val f = sc.parallelize(1 to 1000, 2).map { i => (i, i) }
+      .repartitionAndSortWithinPartitions(new HashPartitioner(2))
+      .mapPartitions { iter =>
+        taskStartedSemaphore.release()
+        // Small delay to ensure that foreach is cancelled if task is killed
+        Thread.sleep(1000)
+        iter
+      }.foreachAsync { _ =>
+        executionOfInterruptibleCounter.getAndIncrement()
+    }
+
+    val sem = new Semaphore(0)
+    Future {
+      taskStartedSemaphore.acquire()
+      f.cancel()
+      sem.release()
+    }
+
+    sem.acquire()
+
+    val e = intercept[SparkException] { f.get() }.getCause
+
+    assert(executionOfInterruptibleCounter.get() === 0)
+    assert(e.getMessage.contains("cancelled") || e.getMessage.contains("killed"))
+
+    // Small delay to ensure tasks are actually finished or killed
+    Thread.sleep(2000)
+    assert(executionOfInterruptibleCounter.get() === 0)
+
+  }
+
   def testCount() {
     // Cancel before launching any tasks
     {
@@ -384,4 +420,5 @@ object JobCancellationSuite {
   val taskStartedSemaphore = new Semaphore(0)
   val taskCancelledSemaphore = new Semaphore(0)
   val twoJobsSharingStageSemaphore = new Semaphore(0)
+  val executionOfInterruptibleCounter = new AtomicInteger(0)
 }