diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 3ce6f29c6084..f086038d0f6c 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -2215,13 +2215,13 @@ package object config { ConfigBuilder("spark.speculation.multiplier") .version("0.6.0") .doubleConf - .createWithDefault(1.5) + .createWithDefault(3) private[spark] val SPECULATION_QUANTILE = ConfigBuilder("spark.speculation.quantile") .version("0.6.0") .doubleConf - .createWithDefault(0.75) + .createWithDefault(0.9) private[spark] val SPECULATION_MIN_THRESHOLD = ConfigBuilder("spark.speculation.minTaskRuntime") diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala index c55de278a6d2..ab2c00e36846 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala @@ -2013,6 +2013,7 @@ class TaskSetManagerSuite val conf = new SparkConf() conf.set(config.SPECULATION_ENABLED, true) conf.set(config.SPECULATION_QUANTILE.key, speculationQuantile.toString) + conf.set(config.SPECULATION_MULTIPLIER.key, "1.5") // Set the number of slots per executor conf.set(config.EXECUTOR_CORES.key, numExecutorCores.toString) conf.set(config.CPUS_PER_TASK.key, numCoresPerTask.toString) @@ -2414,6 +2415,7 @@ class TaskSetManagerSuite // minTimeToSpeculation parameter to checkSpeculatableTasks val conf = new SparkConf() .set(config.SPECULATION_MULTIPLIER, 0.0) + .set(config.SPECULATION_QUANTILE, 0.75) .set(config.SPECULATION_ENABLED, true) sc = new SparkContext("local", "test", conf) val ser = sc.env.closureSerializer.newInstance() diff --git a/docs/configuration.md b/docs/configuration.md index d3e9f27c4761..d5e2a569fdea 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -2907,7 +2907,7 @@ Apart from these, the following properties are also available, and may be useful spark.speculation.multiplier - 1.5 + 3 How many times slower a task is than the median to be considered for speculation. @@ -2915,7 +2915,7 @@ Apart from these, the following properties are also available, and may be useful spark.speculation.quantile - 0.75 + 0.9 Fraction of tasks which must be complete before speculation is enabled for a particular stage. diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md index 8baab5ec082b..5f3560883e59 100644 --- a/docs/core-migration-guide.md +++ b/docs/core-migration-guide.md @@ -44,6 +44,8 @@ license: | - Since Spark 4.0, the default log4j output has shifted from plain text to JSON lines to enhance analyzability. To revert to plain text output, you can either set `spark.log.structuredLogging.enabled` to `false`, or use a custom log4j configuration. +- Since Spark 4.0, Spark performs speculative executions less agressively with `spark.speculation.multiplier=3` and `spark.speculation.quantile=0.9`. To restore the legacy behavior, you can set `spark.speculation.multiplier=1.5` and `spark.speculation.quantile=0.75`. + ## Upgrading from Core 3.4 to 3.5 - Since Spark 3.5, `spark.yarn.executor.failuresValidityInterval` is deprecated. Use `spark.executor.failuresValidityInterval` instead.