included entrypoint logic

apache · ifilonenko · Apr 16, 2018 · Apr 17, 2018 · Apr 18, 2018 · Apr 18, 2018
commit b7b3db0abfbf425120fa21cc61e603c5d766f8af
diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh
@@ -63,12 +63,20 @@ function build {
   if [ ! -d "$IMG_PATH" ]; then
     error "Cannot find docker image. This script must be run from a runnable distribution of Apache Spark."
   fi
-
-  local DOCKERFILE=${DOCKERFILE:-"$IMG_PATH/spark/Dockerfile"}
+  local BINDING_BUILD_ARGS=(
+    --build-arg
+    base_img=$(image_ref spark)
+  )
+  local BASEDOCKERFILE=${BASEDOCKERFILE:-"$IMG_PATH/spark/Dockerfile"}
+  local PYDOCKERFILE=${PYDOCKERFILE:-"$IMG_PATH/spark/bindings/python/Dockerfile"}
 
   docker build "${BUILD_ARGS[@]}" \
     -t $(image_ref spark) \
-    -f "$DOCKERFILE" .
+    -f "$BASEDOCKERFILE" .
+
+    docker build "${BINDING_BUILD_ARGS[@]}" \
+    -t $(image_ref spark-py) \
+    -f "$PYDOCKERFILE" .
 }
 
 function push {
@@ -86,7 +94,8 @@ Commands:
   push        Push a pre-built image to a registry. Requires a repository address to be provided.
 
 Options:
-  -f file     Dockerfile to build. By default builds the Dockerfile shipped with Spark.
+  -f file     Dockerfile to build for JVM based Jobs. By default builds the Dockerfile shipped with Spark.
+  -p file     Dockerfile with Python baked in. By default builds the Dockerfile shipped with Spark.
   -r repo     Repository address.
   -t tag      Tag to apply to the built image, or to identify the image to be pushed.
   -m          Use minikube's Docker daemon.
@@ -116,12 +125,14 @@ fi
 
 REPO=
 TAG=
-DOCKERFILE=
+BASEDOCKERFILE=
+PYDOCKERFILE=
 while getopts f:mr:t: option
 do
  case "${option}"
  in
- f) DOCKERFILE=${OPTARG};;
+ f) BASEDOCKERFILE=${OPTARG};;
+ p) PYDOCKERFILE=${OPTARG};;
  r) REPO=${OPTARG};;
  t) TAG=${OPTARG};;
  m)

diff --git a/...tes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala b/...tes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
@@ -25,6 +25,7 @@ import org.apache.spark.SparkException
 import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesDriverSpecificConf, KubernetesUtils, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
+import org.apache.spark.deploy.k8s.submit._
 import org.apache.spark.internal.config._
 import org.apache.spark.launcher.SparkLauncher
 
@@ -44,6 +45,10 @@ private[spark] class BasicDriverFeatureStep(
   private val driverCpuCores = conf.get("spark.driver.cores", "1")
   private val driverLimitCores = conf.get(KUBERNETES_DRIVER_LIMIT_CORES)
 
+  private val driverDockerContainer = conf.roleSpecificConf.mainAppResource.map {
+    case JavaMainAppResource(_) => "driver"
+    case PythonMainAppResource(_) => "driver-py"
+  }.getOrElse(throw new SparkException("Must specify a JVM or Python Resource"))
   // Memory settings
   private val driverMemoryMiB = conf.get(DRIVER_MEMORY)
   private val memoryOverheadMiB = conf
@@ -89,7 +94,7 @@ private[spark] class BasicDriverFeatureStep(
         .addToRequests("memory", driverMemoryQuantity)
         .addToLimits("memory", driverMemoryQuantity)
         .endResources()
-      .addToArgs("driver")
+      .addToArgs(driverDockerContainer)
       .addToArgs("--properties-file", SPARK_CONF_PATH)
       .addToArgs("--class", conf.roleSpecificConf.mainClass)
       // The user application jar is merged into the spark.jars list and managed through that

diff --git a/...ore/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala b/...ore/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
@@ -24,6 +24,8 @@ import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesDriverSpecificConf, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
+import org.apache.spark.deploy.k8s.submit.JavaMainAppResource
+import org.apache.spark.deploy.k8s.submit.PythonMainAppResource
 
 class BasicDriverFeatureStepSuite extends SparkFunSuite {
 
@@ -33,8 +35,7 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
   private val CONTAINER_IMAGE_PULL_POLICY = "IfNotPresent"
   private val APP_NAME = "spark-test"
   private val MAIN_CLASS = "org.apache.spark.examples.SparkPi"
-  private val PYTHON_MAIN_CLASS = "example.py"
-  private val EXAMPLE_PYTHON_FILES = Seq("example2.py", "example3.py")
+  private val PY_MAIN_CLASS = "org.apache.spark.deploy.PythonRunner"
   private val APP_ARGS = Array("arg1", "arg2", "\"arg 3\"")
   private val CUSTOM_ANNOTATION_KEY = "customAnnotation"
   private val CUSTOM_ANNOTATION_VALUE = "customAnnotationValue"
@@ -62,7 +63,7 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
     val kubernetesConf = KubernetesConf(
       sparkConf,
       KubernetesDriverSpecificConf(
-        None,
+        Some(JavaMainAppResource("")),
         APP_NAME,
         MAIN_CLASS,
         APP_ARGS),
@@ -112,7 +113,6 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
     assert(driverPodMetadata.getLabels.asScala === DRIVER_LABELS)
     assert(driverPodMetadata.getAnnotations.asScala === DRIVER_ANNOTATIONS)
     assert(configuredPod.pod.getSpec.getRestartPolicy === "Never")
-
     val expectedSparkConf = Map(
       KUBERNETES_DRIVER_POD_NAME.key -> "spark-driver-pod",
       "spark.app.id" -> APP_ID,
@@ -121,6 +121,47 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
     assert(featureStep.getAdditionalPodSystemProperties() === expectedSparkConf)
   }
 
+  test("Check appropriate entrypoint rerouting for various bindings") {
+    val sparkConf = new SparkConf()
+      .set(org.apache.spark.internal.config.DRIVER_MEMORY.key, "4g")
+      .set(CONTAINER_IMAGE, "spark-driver:latest")
+    val javaKubernetesConf = KubernetesConf(
+      sparkConf,
+      KubernetesDriverSpecificConf(
+        Some(JavaMainAppResource("")),
+        APP_NAME,
+        PY_MAIN_CLASS,
+        APP_ARGS),
+      RESOURCE_NAME_PREFIX,
+      APP_ID,
+      DRIVER_LABELS,
+      DRIVER_ANNOTATIONS,
+      Map.empty,
+      DRIVER_ENVS,
+      Seq.empty[String])
+    val pythonKubernetesConf = KubernetesConf(
+      sparkConf,
+      KubernetesDriverSpecificConf(
+        Some(PythonMainAppResource("")),
+        APP_NAME,
+        PY_MAIN_CLASS,
+        APP_ARGS),
+      RESOURCE_NAME_PREFIX,
+      APP_ID,
+      DRIVER_LABELS,
+      DRIVER_ANNOTATIONS,
+      Map.empty,
+      DRIVER_ENVS,
+      Seq.empty[String])
+    val javaFeatureStep = new BasicDriverFeatureStep(javaKubernetesConf)
+    val pythonFeatureStep = new BasicDriverFeatureStep(pythonKubernetesConf)
+    val basePod = SparkPod.initialPod()
+    val configuredJavaPod = javaFeatureStep.configurePod(basePod)
+    val configuredPythonPod = pythonFeatureStep.configurePod(basePod)
+    assert(configuredJavaPod.container.getArgs.get(0) === "driver")
+    assert(configuredPythonPod.container.getArgs.get(0) === "driver-py")
+  }
+
   test("Additional system properties resolve jars and set cluster-mode confs.") {
     val allJars = Seq("local:///opt/spark/jar1.jar", "hdfs:///opt/spark/jar2.jar")
     val allFiles = Seq("https://localhost:9000/file1.txt", "local:///opt/spark/file2.txt")
@@ -132,7 +173,7 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
     val kubernetesConf = KubernetesConf(
       sparkConf,
       KubernetesDriverSpecificConf(
-        None,
+        Some(JavaMainAppResource("")),
         APP_NAME,
         MAIN_CLASS,
         APP_ARGS),

diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile
@@ -0,0 +1,33 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+ARG base_img
+FROM $base_img
+WORKDIR /
+COPY python /opt/spark/python
+RUN apk add --no-cache python && \
+    python -m ensurepip && \
+    rm -r /usr/lib/python*/ensurepip && \
+    pip install --upgrade pip setuptools && \
+    rm -r /root/.cache
+ENV PYTHON_VERSION 2.7.13
+ENV PYSPARK_PYTHON python
+ENV PYSPARK_DRIVER_PYTHON python
+ENV PYTHONPATH ${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.6-src.zip:${PYTHONPATH}
+
+WORKDIR /opt/spark/work-dir
+ENTRYPOINT [ "/opt/entrypoint.sh" ]
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
@@ -62,6 +62,14 @@ case "$SPARK_K8S_CMD" in
       "$@"
     )
     ;;
+  driver-py)
+    CMD=(
+      "$SPARK_HOME/bin/spark-submit"
+      --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS"
+      --deploy-mode client
+      $PYSPARK_PRIMARY $PYSPARK_FILES "$@"
+    )
+    ;;
 
   executor)
     CMD=(