1818package org .apache .spark .scheduler
1919
2020import java .util .Properties
21- import java .util .concurrent .Executors
2221
2322import scala .annotation .meta .param
2423import scala .collection .mutable .{ArrayBuffer , HashMap , HashSet , Map }
25- import scala .concurrent .{ExecutionContext , Future }
26- import scala .concurrent .duration .DurationConversions
2724import scala .language .reflectiveCalls
2825import scala .util .control .NonFatal
2926
@@ -37,7 +34,7 @@ import org.apache.spark.scheduler.SchedulingMode.SchedulingMode
3734import org .apache .spark .shuffle .FetchFailedException
3835import org .apache .spark .shuffle .MetadataFetchFailedException
3936import org .apache .spark .storage .{BlockId , BlockManagerId , BlockManagerMaster }
40- import org .apache .spark .util ._
37+ import org .apache .spark .util .{ AccumulatorContext , AccumulatorV2 , CallSite , LongAccumulator , Utils }
4138
4239class DAGSchedulerEventProcessLoopTester (dagScheduler : DAGScheduler )
4340 extends DAGSchedulerEventProcessLoop (dagScheduler) {
@@ -2110,12 +2107,8 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with Timeou
21102107 }
21112108
21122109 test(" The failed stage never resubmitted due to abort stage in another thread" ) {
2113- implicit val executorContext = ExecutionContext
2114- .fromExecutorService(Executors .newFixedThreadPool(5 ))
2115- val duration = 60 .seconds
2116-
2117- val f1 = Future {
2118- try {
2110+ failAfter(60 .seconds) {
2111+ val e = intercept[SparkException ] {
21192112 val rdd1 = sc.makeRDD(Array (1 , 2 , 3 , 4 ), 2 ).map(x => (x, 1 )).groupByKey()
21202113 val shuffleHandle =
21212114 rdd1.dependencies.head.asInstanceOf [ShuffleDependency [_, _, _]].shuffleHandle
@@ -2125,14 +2118,14 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with Timeou
21252118 BlockManagerId (" 1" , " 1" , 1 ), shuffleHandle.shuffleId, 0 , 0 , " test" )
21262119 case (x, _) => x
21272120 }.count()
2128- } catch {
2129- case e : Throwable =>
2130- logInfo(" expected abort stage1: " + e.getMessage)
21312121 }
2122+ assert(e.getMessage.contains(" org.apache.spark.shuffle.FetchFailedException" ))
21322123 }
2133- ThreadUtils .awaitResult(f1, duration)
2134- val f2 = Future {
2135- try {
2124+
2125+ // The following job that fails due to fetching failure will hang without
2126+ // the fix for SPARK-17644
2127+ failAfter(60 .seconds) {
2128+ val e = intercept[SparkException ] {
21362129 val rdd2 = sc.makeRDD(Array (1 , 2 , 3 , 4 ), 2 ).map(x => (x, 1 )).groupByKey()
21372130 val shuffleHandle =
21382131 rdd2.dependencies.head.asInstanceOf [ShuffleDependency [_, _, _]].shuffleHandle
@@ -2142,17 +2135,9 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with Timeou
21422135 BlockManagerId (" 1" , " 1" , 1 ), shuffleHandle.shuffleId, 0 , 0 , " test" )
21432136 case (x, _) => x
21442137 }.count()
2145- } catch {
2146- case e : Throwable =>
2147- logInfo(" expected abort stage2: " + e.getMessage)
21482138 }
2139+ assert(e.getMessage.contains(" org.apache.spark.shuffle.FetchFailedException" ))
21492140 }
2150- try {
2151- ThreadUtils .awaitResult(f2, duration)
2152- } catch {
2153- case e : Throwable => fail(" The failed stage never resubmitted" )
2154- }
2155- executorContext.shutdown()
21562141 }
21572142
21582143 /**
0 commit comments