Address comments.

apache · foxish · Sep 15, 2017 · Oct 17, 2017 · Oct 17, 2017 · Oct 17, 2017
commit 018f4d8ffbbe33526a8273801169b99add38fc8f
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/config.scala
@@ -103,35 +103,11 @@ package object config extends Logging {
       .longConf
       .createWithDefault(1)
 
-  private[spark] val INIT_CONTAINER_JARS_DOWNLOAD_LOCATION =
-    ConfigBuilder("spark.kubernetes.mountdependencies.jarsDownloadDir")
-      .doc("Location to download jars to in the driver and executors. When using" +
-        " spark-submit, this directory must be empty and will be mounted as an empty directory" +
-        " volume on the driver and executor pod.")
-      .stringConf
-      .createWithDefault("/var/spark-data/spark-jars")
-
   private[spark] val KUBERNETES_EXECUTOR_LIMIT_CORES =
     ConfigBuilder("spark.kubernetes.executor.limit.cores")
       .doc("Specify the hard cpu limit for a single executor pod")
       .stringConf
       .createOptional
 
   private[spark] val KUBERNETES_NODE_SELECTOR_PREFIX = "spark.kubernetes.node.selector."
-
-  private[spark] def resolveK8sMaster(rawMasterString: String): String = {
-    if (!rawMasterString.startsWith("k8s://")) {
-      throw new IllegalArgumentException("Master URL should start with k8s:// in Kubernetes mode.")
-    }
-    val masterWithoutK8sPrefix = rawMasterString.replaceFirst("k8s://", "")
-    if (masterWithoutK8sPrefix.startsWith("http://")
-      || masterWithoutK8sPrefix.startsWith("https://")) {
-      masterWithoutK8sPrefix
-    } else {
-      val resolvedURL = s"https://$masterWithoutK8sPrefix"
-      logInfo("No scheme specified for kubernetes master URL, so defaulting to https. Resolved" +
-        s" URL is $resolvedURL")
-      resolvedURL
-    }
-  }
 }
diff --git a/...netes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodFactory.scala b/...netes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodFactory.scala
@@ -47,7 +47,6 @@ private[spark] class ExecutorPodFactoryImpl(sparkConf: SparkConf)
 
   private val executorExtraClasspath = sparkConf.get(
     org.apache.spark.internal.config.EXECUTOR_CLASS_PATH)
-  private val executorJarsDownloadDir = sparkConf.get(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION)
 
   private val executorLabels = ConfigurationUtils.parsePrefixedKeyValuePairs(
     sparkConf,
@@ -94,7 +93,7 @@ private[spark] class ExecutorPodFactoryImpl(sparkConf: SparkConf)
       MEMORY_OVERHEAD_MIN_MIB))
   private val executorMemoryWithOverhead = executorMemoryMiB + memoryOverheadMiB
 
-  private val executorCores = sparkConf.getDouble("spark.executor.cores", 1d)
+  private val executorCores = sparkConf.getDouble("spark.executor.cores", 1)
   private val executorLimitCores = sparkConf.getOption(KUBERNETES_EXECUTOR_LIMIT_CORES.key)
 
   override def createExecutorPod(
@@ -108,7 +107,7 @@ private[spark] class ExecutorPodFactoryImpl(sparkConf: SparkConf)
 
     // hostname must be no longer than 63 characters, so take the last 63 characters of the pod
     // name as the hostname.  This preserves uniqueness since the end of name contains
-    // executorId and applicationId
+    // executorId
     val hostname = name.substring(Math.max(0, name.length - 63))
     val resolvedExecutorLabels = Map(
       SPARK_EXECUTOR_ID_LABEL -> executorId,
@@ -139,15 +138,14 @@ private[spark] class ExecutorPodFactoryImpl(sparkConf: SparkConf)
             new EnvVarBuilder().withName(s"$ENV_JAVA_OPT_PREFIX$index").withValue(opt).build()
         }
       }.getOrElse(Seq.empty[EnvVar])
-    val executorEnv = (Seq(
+    val executorEnv = Seq(
       (ENV_EXECUTOR_PORT, executorPort.toString),
       (ENV_DRIVER_URL, driverUrl),
       // Executor backend expects integral value for executor cores, so round it up to an int.
       (ENV_EXECUTOR_CORES, math.ceil(executorCores).toInt.toString),
       (ENV_EXECUTOR_MEMORY, executorMemoryString),
       (ENV_APPLICATION_ID, applicationId),
-      (ENV_EXECUTOR_ID, executorId),
-      (ENV_MOUNTED_CLASSPATH, s"$executorJarsDownloadDir/*")) ++ executorEnvs)
+      (ENV_EXECUTOR_ID, executorId))
       .map(env => new EnvVarBuilder()
         .withName(env._1)
         .withValue(env._2)

diff --git a/...main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala b/...main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
@@ -20,6 +20,7 @@ import java.io.Closeable
 import java.net.InetAddress
 import java.util.concurrent.{ConcurrentHashMap, ExecutorService, ScheduledExecutorService, TimeUnit}
 import java.util.concurrent.atomic.{AtomicInteger, AtomicLong, AtomicReference}
+import javax.annotation.concurrent.GuardedBy
 
 import io.fabric8.kubernetes.api.model._
 import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException, Watcher}
@@ -49,9 +50,11 @@ private[spark] class KubernetesClusterSchedulerBackend(
 
   private val EXECUTOR_ID_COUNTER = new AtomicLong(0L)
   private val RUNNING_EXECUTOR_PODS_LOCK = new Object
-  // Indexed by executor IDs and guarded by RUNNING_EXECUTOR_PODS_LOCK.
+  // Indexed by executor IDs
+  @GuardedBy("RUNNING_EXECUTOR_PODS_LOCK")
   private val runningExecutorsToPods = new mutable.HashMap[String, Pod]
-  // Indexed by executor pod names and guarded by RUNNING_EXECUTOR_PODS_LOCK.
+  // Indexed by executor pod names
+  @GuardedBy("RUNNING_EXECUTOR_PODS_LOCK")
   private val runningPodsToExecutors = new mutable.HashMap[String, String]
   private val executorPodsByIPs = new ConcurrentHashMap[String, Pod]()
   private val podsWithKnownExitReasons = new ConcurrentHashMap[String, ExecutorExited]()
@@ -105,21 +108,44 @@ private[spark] class KubernetesClusterSchedulerBackend(
 
     override def run(): Unit = {
       handleDisconnectedExecutors()
+      val executorsToAllocate = mutable.Map[String, Pod]()
+      val currentTotalRegisteredExecutors = totalRegisteredExecutors.get
+      val currentTotalExpectedExecutors = totalExpectedExecutors.get
+      val currentNodeToLocalTaskCount = getNodesWithLocalTaskCounts
+      if (currentTotalRegisteredExecutors < runningExecutorsToPods.size) {
+        logDebug("Waiting for pending executors before scaling")
+      } else if (currentTotalExpectedExecutors <= runningExecutorsToPods.size) {
+        logDebug("Maximum allowed executor limit reached. Not scaling up further.")
+      } else {
+        val nodeToLocalTaskCount = getNodesWithLocalTaskCounts
+        for (i <- 0 until math.min(
+          currentTotalExpectedExecutors - runningExecutorsToPods.size, podAllocationSize)) {
+          val executorId = EXECUTOR_ID_COUNTER.incrementAndGet().toString
+          val executorPod = executorPodFactory.createExecutorPod(
+            executorId,
+            applicationId(),
+            driverUrl,
+            conf.getExecutorEnv,
+            driverPod,
+            nodeToLocalTaskCount)
+          executorsToAllocate(executorId) = executorPod
+          logInfo(
+            s"Requesting a new executor, total executors is now ${runningExecutorsToPods.size}")
+        }
+      }
+      val allocatedExecutors = executorsToAllocate.mapValues { pod =>
+        Utils.tryLog {
+          kubernetesClient.pods().create(pod)
+        }
+      }
       RUNNING_EXECUTOR_PODS_LOCK.synchronized {
-        if (totalRegisteredExecutors.get() < runningExecutorsToPods.size) {
-          logDebug("Waiting for pending executors before scaling")
-        } else if (totalExpectedExecutors.get() <= runningExecutorsToPods.size) {
-          logDebug("Maximum allowed executor limit reached. Not scaling up further.")
-        } else {
-          val nodeToLocalTaskCount = getNodesWithLocalTaskCounts
-          for (i <- 0 until math.min(
-            totalExpectedExecutors.get - runningExecutorsToPods.size, podAllocationSize)) {
-            val (executorId, pod) = allocateNewExecutorPod(nodeToLocalTaskCount)
-            runningExecutorsToPods.put(executorId, pod)
-            runningPodsToExecutors.put(pod.getMetadata.getName, executorId)
-            logInfo(
-              s"Requesting a new executor, total executors is now ${runningExecutorsToPods.size}")
-          }
+        allocatedExecutors.map {
+          case (executorId, attemptedAllocatedExecutor) =>
+            attemptedAllocatedExecutor.map { successfullyAllocatedExecutor =>
+              runningExecutorsToPods.put(executorId, successfullyAllocatedExecutor)
+              runningPodsToExecutors.put(
+                successfullyAllocatedExecutor.getMetadata.getName, executorId)
+            }
         }
       }
     }
@@ -128,25 +154,25 @@ private[spark] class KubernetesClusterSchedulerBackend(
       // For each disconnected executor, synchronize with the loss reasons that may have been found
       // by the executor pod watcher. If the loss reason was discovered by the watcher,
       // inform the parent class with removeExecutor.
-      disconnectedPodsByExecutorIdPendingRemoval.keys().asScala.foreach { case (executorId) =>
-        val executorPod = disconnectedPodsByExecutorIdPendingRemoval.get(executorId)
-        val knownExitReason = Option(podsWithKnownExitReasons.remove(
-          executorPod.getMetadata.getName))
-        knownExitReason.fold {
-          removeExecutorOrIncrementLossReasonCheckCount(executorId)
-        } { executorExited =>
-          logWarning(s"Removing executor $executorId with loss reason " + executorExited.message)
-          removeExecutor(executorId, executorExited)
-          // We keep around executors that have exit conditions caused by the application. This
-          // allows them to be debugged later on. Otherwise, mark them as to be deleted from the
-          // the API server.
-          if (!executorExited.exitCausedByApp) {
-            logInfo(s"Executor $executorId failed because of a framework error.")
-            deleteExecutorFromClusterAndDataStructures(executorId)
-          } else {
-            logInfo(s"Executor $executorId exited because of the application.")
+      disconnectedPodsByExecutorIdPendingRemoval.asScala.foreach {
+        case (executorId, executorPod) =>
+          val knownExitReason = Option(podsWithKnownExitReasons.remove(
+            executorPod.getMetadata.getName))
+          knownExitReason.fold {
+            removeExecutorOrIncrementLossReasonCheckCount(executorId)
+          } { executorExited =>
+            logWarning(s"Removing executor $executorId with loss reason " + executorExited.message)
+            removeExecutor(executorId, executorExited)
+            // We keep around executors that have exit conditions caused by the application. This
+            // allows them to be debugged later on. Otherwise, mark them as to be deleted from the
+            // the API server.
+            if (!executorExited.exitCausedByApp) {
+              logInfo(s"Executor $executorId failed because of a framework error.")
+              deleteExecutorFromClusterAndDataStructures(executorId)
+            } else {
+              logInfo(s"Executor $executorId exited because of the application.")
+            }
           }
-        }
       }
     }
 
@@ -163,12 +189,17 @@ private[spark] class KubernetesClusterSchedulerBackend(
     def deleteExecutorFromClusterAndDataStructures(executorId: String): Unit = {
       disconnectedPodsByExecutorIdPendingRemoval.remove(executorId)
       executorReasonCheckAttemptCounts -= executorId
-      RUNNING_EXECUTOR_PODS_LOCK.synchronized {
+      podsWithKnownExitReasons -= executorId
+      val maybeExecutorPodToDelete = RUNNING_EXECUTOR_PODS_LOCK.synchronized {
         runningExecutorsToPods.remove(executorId).map { pod =>
-          kubernetesClient.pods().delete(pod)
           runningPodsToExecutors.remove(pod.getMetadata.getName)
-        }.getOrElse(logWarning(s"Unable to remove pod for unknown executor $executorId"))
+          pod
+        }.orElse {
+          logWarning(s"Unable to remove pod for unknown executor $executorId")
+          None
+        }
       }
+      maybeExecutorPodToDelete.foreach(pod => kubernetesClient.pods().delete(pod))
     }
   }
 
@@ -203,25 +234,23 @@ private[spark] class KubernetesClusterSchedulerBackend(
     // TODO investigate why Utils.tryLogNonFatalError() doesn't work in this context.
     // When using Utils.tryLogNonFatalError some of the code fails but without any logs or
     // indication as to why.
-    try {
-      RUNNING_EXECUTOR_PODS_LOCK.synchronized {
-        runningExecutorsToPods.values.foreach(kubernetesClient.pods().delete(_))
+    Utils.tryLogNonFatalError {
+      val executorPodsToDelete = RUNNING_EXECUTOR_PODS_LOCK.synchronized {
+        val runningExecutorPodsCopy = Seq(runningExecutorsToPods.values.toSeq: _*)
         runningExecutorsToPods.clear()
         runningPodsToExecutors.clear()
+        runningExecutorPodsCopy
       }
+      kubernetesClient.pods().delete(executorPodsToDelete: _*)
       executorPodsByIPs.clear()
       val resource = executorWatchResource.getAndSet(null)
       if (resource != null) {
         resource.close()
       }
-    } catch {
-      case e: Throwable => logError("Uncaught exception while shutting down controllers.", e)
     }
-    try {
+    Utils.tryLogNonFatalError {
       logInfo("Closing kubernetes client")
       kubernetesClient.close()
-    } catch {
-      case e: Throwable => logError("Uncaught exception closing Kubernetes client.", e)
     }
   }
 
@@ -231,7 +260,7 @@ private[spark] class KubernetesClusterSchedulerBackend(
    */
   private def getNodesWithLocalTaskCounts() : Map[String, Int] = {
     val nodeToLocalTaskCount = mutable.Map[String, Int]() ++
-      KubernetesClusterSchedulerBackend.this.synchronized {
+      synchronized {
         hostToLocalTaskCount
       }
     for (pod <- executorPodsByIPs.values().asScala) {
@@ -247,58 +276,31 @@ private[spark] class KubernetesClusterSchedulerBackend(
     nodeToLocalTaskCount.toMap[String, Int]
   }
 
-  /**
-   * Allocates a new executor pod
-   *
-   * @param nodeToLocalTaskCount  A map of K8s cluster nodes to the number of tasks that could
-   *                              benefit from data locality if an executor launches on the cluster
-   *                              node.
-   * @return A tuple of the new executor name and the Pod data structure.
-   */
-  private def allocateNewExecutorPod(nodeToLocalTaskCount: Map[String, Int]): (String, Pod) = {
-    val executorId = EXECUTOR_ID_COUNTER.incrementAndGet().toString
-    val executorPod = executorPodFactory.createExecutorPod(
-      executorId,
-      applicationId(),
-      driverUrl,
-      conf.getExecutorEnv,
-      driverPod,
-      nodeToLocalTaskCount)
-    try {
-      (executorId, kubernetesClient.pods.create(executorPod))
-    } catch {
-      case throwable: Throwable =>
-        logError("Failed to allocate executor pod.", throwable)
-        throw throwable
-    }
-  }
-
   override def doRequestTotalExecutors(requestedTotal: Int): Future[Boolean] = Future[Boolean] {
     totalExpectedExecutors.set(requestedTotal)
     true
   }
 
   override def doKillExecutors(executorIds: Seq[String]): Future[Boolean] = Future[Boolean] {
+    val podsToDelete = mutable.Buffer[Pod]()
     RUNNING_EXECUTOR_PODS_LOCK.synchronized {
       for (executor <- executorIds) {
         val maybeRemovedExecutor = runningExecutorsToPods.remove(executor)
         maybeRemovedExecutor.foreach { executorPod =>
-          kubernetesClient.pods().delete(executorPod)
           disconnectedPodsByExecutorIdPendingRemoval.put(executor, executorPod)
           runningPodsToExecutors.remove(executorPod.getMetadata.getName)
+          podsToDelete += executorPod
         }
         if (maybeRemovedExecutor.isEmpty) {
           logWarning(s"Unable to remove pod for unknown executor $executor")
         }
       }
     }
+    kubernetesClient.pods().delete(podsToDelete: _*)
     true
   }
 
   def getExecutorPodByIP(podIP: String): Option[Pod] = {
-    // Note: Per https://github.com/databricks/scala-style-guide#concurrency, we don't
-    // want to be switching to scala.collection.concurrent.Map on
-    // executorPodsByIPs.
     val pod = executorPodsByIPs.get(podIP)
     Option(pod)
   }