apache · zhouyifan279 · Aug 9, 2023 · Aug 14, 2023 · Aug 15, 2023 · Aug 15, 2023
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
@@ -59,17 +59,17 @@ jobs:
           - java: 8
             spark: '3.3'
             spark-archive: '-Dspark.archive.mirror=https://archive.apache.org/dist/spark/spark-3.1.3 -Dspark.archive.name=spark-3.1.3-bin-hadoop3.2.tgz -Pzookeeper-3.6'
-            exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest'
+            exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.SparkLocalClusterTest'
             comment: 'verify-on-spark-3.1-binary'
           - java: 8
             spark: '3.3'
             spark-archive: '-Dspark.archive.mirror=https://archive.apache.org/dist/spark/spark-3.2.4 -Dspark.archive.name=spark-3.2.4-bin-hadoop3.2.tgz -Pzookeeper-3.6'
-            exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest'
+            exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.SparkLocalClusterTest'
             comment: 'verify-on-spark-3.2-binary'
           - java: 8
             spark: '3.3'
             spark-archive: '-Dspark.archive.mirror=https://archive.apache.org/dist/spark/spark-3.4.0 -Dspark.archive.name=spark-3.4.0-bin-hadoop3.tgz -Pzookeeper-3.6'
-            exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest'
+            exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.SparkLocalClusterTest'
             comment: 'verify-on-spark-3.4-binary'
         exclude:
           # SPARK-33772: Spark supports JDK 17 since 3.3.0

diff --git a/extensions/spark/kyuubi-extension-spark-3-3/pom.xml b/extensions/spark/kyuubi-extension-spark-3-3/pom.xml
@@ -37,6 +37,14 @@
             <version>${project.version}</version>
         </dependency>
 
+        <dependency>
+            <groupId>org.apache.kyuubi</groupId>
+            <artifactId>kyuubi-download</artifactId>
+            <version>${project.version}</version>
+            <type>pom</type>
+            <scope>test</scope>
+        </dependency>
+
         <dependency>
             <groupId>org.apache.kyuubi</groupId>
             <artifactId>kyuubi-extension-spark-common_${scala.binary.version}</artifactId>
@@ -45,6 +53,14 @@
             <scope>test</scope>
         </dependency>
 
+        <dependency>
+            <groupId>org.apache.kyuubi</groupId>
+            <artifactId>kyuubi-util-scala_${scala.binary.version}</artifactId>
+            <version>${project.version}</version>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+
         <dependency>
             <groupId>org.scala-lang</groupId>
             <artifactId>scala-library</artifactId>
@@ -130,6 +146,38 @@
     <build>
 
         <plugins>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>build-helper-maven-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>regex-property</id>
+                        <goals>
+                            <goal>regex-property</goal>
+                        </goals>
+                        <configuration>
+                            <name>spark.home</name>
+                            <value>${project.basedir}/../../../externals/kyuubi-download/target/${spark.archive.name}</value>
+                            <regex>(.+)\.tgz</regex>
+                            <replacement>$1</replacement>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.scalatest</groupId>
+                <artifactId>scalatest-maven-plugin</artifactId>
+                <configuration>
+                    <environmentVariables>
+                        <!--
+                          Some tests run Spark in local-cluster mode.
+                          This mode uses SPARK_HOME and SPARK_SCALA_VERSION to build command to launch a Spark Standalone Cluster.
+                          -->
+                        <SPARK_HOME>${spark.home}</SPARK_HOME>
+                        <SPARK_SCALA_VERSION>${scala.binary.version}</SPARK_SCALA_VERSION>
+                    </environmentVariables>
+                </configuration>
+            </plugin>
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-shade-plugin</artifactId>

diff --git a/...i-extension-spark-3-3/src/main/scala/org/apache/spark/sql/FinalStageResourceManager.scala b/...i-extension-spark-3-3/src/main/scala/org/apache/spark/sql/FinalStageResourceManager.scala
@@ -22,6 +22,7 @@ import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.{ExecutorAllocationClient, MapOutputTrackerMaster, SparkContext, SparkEnv}
+import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.{FilterExec, ProjectExec, SortExec, SparkPlan}
@@ -185,7 +186,12 @@ case class FinalStageResourceManager(session: SparkSession)
       numReduce: Int): Unit = {
     val executorAllocationClient = sc.schedulerBackend.asInstanceOf[ExecutorAllocationClient]
 
-    val executorsToKill = findExecutorToKill(sc, targetExecutors, shuffleId, numReduce)
+    val executorsToKill =
+      if (conf.getConf(KyuubiSQLConf.FINAL_WRITE_STAGE_EAGERLY_KILL_EXECUTORS_KILL_ALL)) {
+        executorAllocationClient.getExecutorIds()
+      } else {
+        findExecutorToKill(sc, targetExecutors, shuffleId, numReduce)
+      }
     logInfo(s"Request to kill executors, total count ${executorsToKill.size}, " +
       s"[${executorsToKill.mkString(", ")}].")
     if (executorsToKill.isEmpty) {
@@ -210,6 +216,38 @@ case class FinalStageResourceManager(session: SparkSession)
       adjustTargetNumExecutors = true,
       countFailures = false,
       force = false)
+
+    getAdjustedTargetExecutors(sc, executorAllocationClient)
+      .filter(_ < targetExecutors).foreach { adjustedExecutors =>
+        val delta = targetExecutors - adjustedExecutors
+        logInfo(s"Target executors after kill ($adjustedExecutors) is lower than required " +
+          s"($targetExecutors). Requesting $delta additional executor(s).")
+        executorAllocationClient.requestExecutors(delta)
+      }
+  }
+
+  private def getAdjustedTargetExecutors(
+      sc: SparkContext,
+      executorAllocationClient: ExecutorAllocationClient): Option[Int] = {
+    executorAllocationClient match {
+      case schedulerBackend: CoarseGrainedSchedulerBackend =>
+        try {
+          val field = classOf[CoarseGrainedSchedulerBackend]
+            .getDeclaredField("requestedTotalExecutorsPerResourceProfile")
+          field.setAccessible(true)
+          schedulerBackend.synchronized {
+            val requestedTotalExecutorsPerResourceProfile =
+              field.get(schedulerBackend).asInstanceOf[mutable.HashMap[ResourceProfile, Int]]
+            val defaultRp = sc.resourceProfileManager.defaultResourceProfile
+            requestedTotalExecutorsPerResourceProfile.get(defaultRp)
+          }
+        } catch {
+          case e: Exception =>
+            logWarning("Failed to get requestedTotalExecutors of Default ResourceProfile", e)
+            None
+        }
+      case _ => None
+    }
   }
 
   @transient private val queryStageOptimizerRules: Seq[Rule[SparkPlan]] = Seq(

diff --git a/...ension-spark-3-3/src/test/scala/org/apache/spark/sql/FinalStageResourceManagerSuite.scala b/...ension-spark-3-3/src/test/scala/org/apache/spark/sql/FinalStageResourceManagerSuite.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.SparkConf
+import org.scalatest.time.{Minutes, Span}
+
+import org.apache.kyuubi.sql.KyuubiSQLConf
+import org.apache.kyuubi.tags.SparkLocalClusterTest
+
+@SparkLocalClusterTest
+class FinalStageResourceManagerSuite extends KyuubiSparkSQLExtensionTest {
+
+  override def sparkConf(): SparkConf = {
+    // It is difficult to run spark in local-cluster mode when spark.testing is set.
+    sys.props.remove("spark.testing")
+
+    super.sparkConf().set("spark.master", "local-cluster[3, 1, 1024]")
+      .set("spark.dynamicAllocation.enabled", "true")
+      .set("spark.dynamicAllocation.initialExecutors", "3")
+      .set("spark.dynamicAllocation.minExecutors", "1")
+      .set("spark.dynamicAllocation.shuffleTracking.enabled", "true")
+      .set(KyuubiSQLConf.FINAL_STAGE_CONFIG_ISOLATION.key, "true")
+      .set(KyuubiSQLConf.FINAL_WRITE_STAGE_EAGERLY_KILL_EXECUTORS_ENABLED.key, "true")
+  }
+
+  test("[KYUUBI #5136][Bug] Final Stage hangs forever") {
+    // Prerequisite to reproduce the bug:
+    // 1. Dynamic allocation is enabled.
+    // 2. Dynamic allocation min executors is 1.
+    // 3. target executors < active executors.
+    // 4. No active executor is left after FinalStageResourceManager killed executors.
+    //    This is possible because FinalStageResourceManager retained executors may already be
+    //    requested to be killed but not died yet.
+    // 5. Final Stage required executors is 1.
+    withSQLConf(
+      (KyuubiSQLConf.FINAL_WRITE_STAGE_EAGERLY_KILL_EXECUTORS_KILL_ALL.key, "true")) {
+      withTable("final_stage") {
+        eventually(timeout(Span(10, Minutes))) {
+          sql(
+            "CREATE TABLE final_stage AS SELECT id, count(*) as num FROM (SELECT 0 id) GROUP BY id")
+        }
+      }
+    }
+  }
+}
diff --git a/extensions/spark/kyuubi-extension-spark-3-4/pom.xml b/extensions/spark/kyuubi-extension-spark-3-4/pom.xml
@@ -55,6 +55,22 @@
             <scope>provided</scope>
         </dependency>
 
+        <dependency>
+            <groupId>org.apache.kyuubi</groupId>
+            <artifactId>kyuubi-download</artifactId>
+            <version>${project.version}</version>
+            <type>pom</type>
+            <scope>test</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.kyuubi</groupId>
+            <artifactId>kyuubi-util-scala_${scala.binary.version}</artifactId>
+            <version>${project.version}</version>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+
         <dependency>
             <groupId>org.apache.spark</groupId>
             <artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -111,11 +127,49 @@
             <artifactId>jakarta.xml.bind-api</artifactId>
             <scope>test</scope>
         </dependency>
+
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-slf4j-impl</artifactId>
+            <scope>test</scope>
+        </dependency>
     </dependencies>
 
     <build>
 
         <plugins>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>build-helper-maven-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>regex-property</id>
+                        <goals>
+                            <goal>regex-property</goal>
+                        </goals>
+                        <configuration>
+                            <name>spark.home</name>
+                            <value>${project.basedir}/../../../externals/kyuubi-download/target/${spark.archive.name}</value>
+                            <regex>(.+)\.tgz</regex>
+                            <replacement>$1</replacement>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.scalatest</groupId>
+                <artifactId>scalatest-maven-plugin</artifactId>
+                <configuration>
+                    <environmentVariables>
+                        <!--
+                        Some tests run Spark in local-cluster mode.
+                        This mode uses SPARK_HOME and SPARK_SCALA_VERSION to build command to launch a Spark Standalone Cluster.
+                        -->
+                        <SPARK_HOME>${spark.home}</SPARK_HOME>
+                        <SPARK_SCALA_VERSION>${scala.binary.version}</SPARK_SCALA_VERSION>
+                    </environmentVariables>
+                </configuration>
+            </plugin>
             <plugin>
                 <groupId>org.antlr</groupId>
                 <artifactId>antlr4-maven-plugin</artifactId>

diff --git a/...spark/kyuubi-extension-spark-3-4/src/main/scala/org/apache/kyuubi/sql/KyuubiSQLConf.scala b/...spark/kyuubi-extension-spark-3-4/src/main/scala/org/apache/kyuubi/sql/KyuubiSQLConf.scala
@@ -210,6 +210,14 @@ object KyuubiSQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val FINAL_WRITE_STAGE_EAGERLY_KILL_EXECUTORS_KILL_ALL =
+    buildConf("spark.sql.finalWriteStage.eagerlyKillExecutors.killAll")
+      .doc("When true, eagerly kill all executors before running final write stage. " +
+        "Mainly for test.")
+      .version("1.8.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val FINAL_WRITE_STAGE_SKIP_KILLING_EXECUTORS_FOR_TABLE_CACHE =
     buildConf("spark.sql.finalWriteStage.skipKillingExecutorsForTableCache")
       .doc("When true, skip killing executors if the plan has table caches.")

diff --git a/...i-extension-spark-3-4/src/main/scala/org/apache/spark/sql/FinalStageResourceManager.scala b/...i-extension-spark-3-4/src/main/scala/org/apache/spark/sql/FinalStageResourceManager.scala
@@ -22,6 +22,7 @@ import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.{ExecutorAllocationClient, MapOutputTrackerMaster, SparkContext, SparkEnv}
+import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.{FilterExec, ProjectExec, SortExec, SparkPlan}
@@ -188,7 +189,12 @@ case class FinalStageResourceManager(session: SparkSession)
       numReduce: Int): Unit = {
     val executorAllocationClient = sc.schedulerBackend.asInstanceOf[ExecutorAllocationClient]
 
-    val executorsToKill = findExecutorToKill(sc, targetExecutors, shuffleId, numReduce)
+    val executorsToKill =
+      if (conf.getConf(KyuubiSQLConf.FINAL_WRITE_STAGE_EAGERLY_KILL_EXECUTORS_KILL_ALL)) {
+        executorAllocationClient.getExecutorIds()
+      } else {
+        findExecutorToKill(sc, targetExecutors, shuffleId, numReduce)
+      }
     logInfo(s"Request to kill executors, total count ${executorsToKill.size}, " +
       s"[${executorsToKill.mkString(", ")}].")
     if (executorsToKill.isEmpty) {
@@ -213,6 +219,38 @@ case class FinalStageResourceManager(session: SparkSession)
       adjustTargetNumExecutors = true,
       countFailures = false,
       force = false)
+
+    getAdjustedTargetExecutors(sc, executorAllocationClient)
+      .filter(_ < targetExecutors).foreach { adjustedExecutors =>
+        val delta = targetExecutors - adjustedExecutors
+        logInfo(s"Target executors after kill ($adjustedExecutors) is lower than required " +
+          s"($targetExecutors). Requesting $delta additional executor(s).")
+        executorAllocationClient.requestExecutors(delta)
+      }
+  }
+
+  private def getAdjustedTargetExecutors(
+      sc: SparkContext,
+      executorAllocationClient: ExecutorAllocationClient): Option[Int] = {
+    executorAllocationClient match {
+      case schedulerBackend: CoarseGrainedSchedulerBackend =>
+        try {
+          val field = classOf[CoarseGrainedSchedulerBackend]
+            .getDeclaredField("requestedTotalExecutorsPerResourceProfile")
+          field.setAccessible(true)
+          schedulerBackend.synchronized {
+            val requestedTotalExecutorsPerResourceProfile =
+              field.get(schedulerBackend).asInstanceOf[mutable.HashMap[ResourceProfile, Int]]
+            val defaultRp = sc.resourceProfileManager.defaultResourceProfile
+            requestedTotalExecutorsPerResourceProfile.get(defaultRp)
+          }
+        } catch {
+          case e: Exception =>
+            logWarning("Failed to get requestedTotalExecutors of Default ResourceProfile", e)
+            None
+        }
+      case _ => None
+    }
   }
 
   @transient private val queryStageOptimizerRules: Seq[Rule[SparkPlan]] = Seq(