apache · advancedxy · Jan 31, 2018 · Feb 9, 2018 · Feb 26, 2018 · Feb 27, 2018
diff --git a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
@@ -95,7 +95,7 @@ private[spark] class BlockStoreShuffleReader[K, C](
     }
 
     // Sort the output if there is a sort ordering defined.
-    dep.keyOrdering match {
+    val resultIter = dep.keyOrdering match {
       case Some(keyOrd: Ordering[K]) =>
         // Create an ExternalSorter to sort the data.
         val sorter =
@@ -104,9 +104,16 @@ private[spark] class BlockStoreShuffleReader[K, C](
         context.taskMetrics().incMemoryBytesSpilled(sorter.memoryBytesSpilled)
         context.taskMetrics().incDiskBytesSpilled(sorter.diskBytesSpilled)
         context.taskMetrics().incPeakExecutionMemory(sorter.peakMemoryUsedBytes)
+        // Use completion callback to stop sorter if task was completed(either finished/cancelled).
+        context.addTaskCompletionListener(_ => {
+          sorter.stop()
+        })
         CompletionIterator[Product2[K, C], Iterator[Product2[K, C]]](sorter.iterator, sorter.stop())
       case None =>
         aggregatedIter
     }
+    // Use another interruptible iterator here to support task cancellation as aggregator or(and)
+    // sorter may have consumed previous interruptible iterator.
+    new InterruptibleIterator[Product2[K, C]](context, resultIter)
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
@@ -18,15 +18,14 @@
 package org.apache.spark
 
 import java.util.concurrent.Semaphore
+import java.util.concurrent.atomic.AtomicInteger
 
 import scala.concurrent.ExecutionContext.Implicits.global
 import scala.concurrent.Future
 import scala.concurrent.duration._
-
 import org.scalatest.BeforeAndAfter
 import org.scalatest.Matchers
-
-import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskStart}
+import org.apache.spark.scheduler.{SparkListener, SparkListenerStageCompleted, SparkListenerTaskEnd, SparkListenerTaskStart}
 import org.apache.spark.util.ThreadUtils
 
 /**
@@ -320,6 +319,55 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
     f2.get()
   }
 
+  test("Interruptible iterator of shuffle reader") {
+    import JobCancellationSuite._
+    val numSlice = 2
+    sc = new SparkContext(s"local[$numSlice]", "test")
+
+    val f = sc.parallelize(1 to 1000, numSlice).map { i => (i, i) }
+      .repartitionAndSortWithinPartitions(new HashPartitioner(2))
+      .mapPartitions { iter =>
+        taskStartedSemaphore.release()
+        iter
+      }.foreachAsync { x =>
+        if ( x._1 >= 10) { // this block of code is partially executed.
+          taskCancelledSemaphore.acquire()
+        }
+        executionOfInterruptibleCounter.getAndIncrement()
+    }
+
+    val sem = new Semaphore(0)
+    val taskCompletedSem = new Semaphore(0)
+    Future {
+      taskStartedSemaphore.acquire()
+      f.cancel()
+      sem.release()
+    }
+
+    sc.addSparkListener(new SparkListener {
+      override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): Unit = {
+        // release taskCancelledSemaphore when cancelTasks event has been posted
+        if (stageCompleted.stageInfo.stageId == 1) {
+          taskCancelledSemaphore.release(1000)
+        }
+      }
+
+      override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
+        if (taskEnd.stageId == 1) { // make sure task ends
+          taskCompletedSem.release()
+        }
+      }
+    })
+
+    sem.acquire()
+    val e = intercept[SparkException] { f.get() }.getCause
+    assert(e.getMessage.contains("cancelled") || e.getMessage.contains("killed"))
+
+    taskCompletedSem.acquire(numSlice)
+    // 11 as |1..10| + |501|(another partition)
+    assert(executionOfInterruptibleCounter.get() <= 11)
+ }
+
   def testCount() {
     // Cancel before launching any tasks
     {
@@ -384,4 +432,5 @@ object JobCancellationSuite {
   val taskStartedSemaphore = new Semaphore(0)
   val taskCancelledSemaphore = new Semaphore(0)
   val twoJobsSharingStageSemaphore = new Semaphore(0)
+  val executionOfInterruptibleCounter = new AtomicInteger(0)
 }