Add migration guide and fix two test failures

apache · dongjoon-hyun · Apr 3, 2024 · Apr 3, 2024 · Apr 3, 2024 · 1e87333a533870412658bdcf4449115c7562151b
commit 1e87333a533870412658bdcf4449115c7562151b
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -2013,6 +2013,7 @@ class TaskSetManagerSuite
     val conf = new SparkConf()
     conf.set(config.SPECULATION_ENABLED, true)
     conf.set(config.SPECULATION_QUANTILE.key, speculationQuantile.toString)
+    conf.set(config.SPECULATION_MULTIPLIER.key, "1.5")
     // Set the number of slots per executor
     conf.set(config.EXECUTOR_CORES.key, numExecutorCores.toString)
     conf.set(config.CPUS_PER_TASK.key, numCoresPerTask.toString)
@@ -2414,6 +2415,7 @@ class TaskSetManagerSuite
     // minTimeToSpeculation parameter to checkSpeculatableTasks
     val conf = new SparkConf()
       .set(config.SPECULATION_MULTIPLIER, 0.0)
+      .set(config.SPECULATION_QUANTILE, 0.75)
       .set(config.SPECULATION_ENABLED, true)
     sc = new SparkContext("local", "test", conf)
     val ser = sc.env.closureSerializer.newInstance()

diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md
@@ -44,6 +44,8 @@ license: |
 
 - Since Spark 4.0, the default log4j output has shifted from plain text to JSON lines to enhance analyzability. To revert to plain text output, you can either set `spark.log.structuredLogging.enabled` to `false`, or use a custom log4j configuration.
 
+- Since Spark 4.0, Spark performs speculative executions less agressively with `spark.speculation.multiplier=3` and `spark.speculation.quantile=0.9`. To restore the legacy behavior, you can set `spark.speculation.multiplier=1.5` and `spark.speculation.quantile=0.75`.
+
 ## Upgrading from Core 3.4 to 3.5
 
 - Since Spark 3.5, `spark.yarn.executor.failuresValidityInterval` is deprecated. Use `spark.executor.failuresValidityInterval` instead.