From 250cb955efe9c9bdf24be6cefcfb1dfa71d39bc4 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Mon, 4 Aug 2014 13:12:15 -0700 Subject: [PATCH 01/53] Do not ignore spark.driver.extra* for client mode --- .../scala/org/apache/spark/deploy/SparkSubmit.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 318509a67a36..430bf27c1c8d 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -171,6 +171,12 @@ object SparkSubmit { OptionAssigner(args.master, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.master"), OptionAssigner(args.name, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.app.name"), OptionAssigner(args.jars, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.jars"), + OptionAssigner(args.driverExtraClassPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, + sysProp = "spark.driver.extraClassPath"), + OptionAssigner(args.driverExtraJavaOptions, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, + sysProp = "spark.driver.extraJavaOptions"), + OptionAssigner(args.driverExtraLibraryPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, + sysProp = "spark.driver.extraLibraryPath"), // Standalone cluster only OptionAssigner(args.driverMemory, STANDALONE, CLUSTER, clOption = "--memory"), @@ -195,12 +201,6 @@ object SparkSubmit { OptionAssigner(args.jars, YARN, CLUSTER, clOption = "--addJars"), // Other options - OptionAssigner(args.driverExtraClassPath, STANDALONE | YARN, CLUSTER, - sysProp = "spark.driver.extraClassPath"), - OptionAssigner(args.driverExtraJavaOptions, STANDALONE | YARN, CLUSTER, - sysProp = "spark.driver.extraJavaOptions"), - OptionAssigner(args.driverExtraLibraryPath, STANDALONE | YARN, CLUSTER, - sysProp = "spark.driver.extraLibraryPath"), OptionAssigner(args.executorMemory, STANDALONE | MESOS | YARN, ALL_DEPLOY_MODES, sysProp = "spark.executor.memory"), OptionAssigner(args.totalExecutorCores, STANDALONE | MESOS, ALL_DEPLOY_MODES, From a2ab1b0a3a976e361ea86fc20fc7083e7f9885ca Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Tue, 5 Aug 2014 21:32:05 -0700 Subject: [PATCH 02/53] Parse spark.driver.extra* in bash --- bin/spark-submit | 32 +++++++++++++++++++-- bin/spark-submit.new | 67 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 2 deletions(-) create mode 100755 bin/spark-submit.new diff --git a/bin/spark-submit b/bin/spark-submit index 9e7cecedd032..70d4d9c06893 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -25,6 +25,8 @@ while (($#)); do DEPLOY_MODE=$2 elif [ "$1" = "--driver-memory" ]; then DRIVER_MEMORY=$2 + elif [ "$1" = "--properties-file" ]; then + PROPERTIES_FILE=$2 elif [ "$1" = "--driver-library-path" ]; then export SPARK_SUBMIT_LIBRARY_PATH=$2 elif [ "$1" = "--driver-class-path" ]; then @@ -36,9 +38,35 @@ while (($#)); do done DEPLOY_MODE=${DEPLOY_MODE:-"client"} +PROPERTIES_FILE=${PROPERTIES_FILE:-"$SPARK_HOME/conf/spark-defaults.conf"} -if [ -n "$DRIVER_MEMORY" ] && [ $DEPLOY_MODE == "client" ]; then - export SPARK_DRIVER_MEMORY=$DRIVER_MEMORY +# For client mode, the driver will be launched in the JVM that launches +# SparkSubmit, so we need to handle the class paths, java options, and +# memory pre-emptively in bash. Otherwise, it will be too late by the +# time the JVM has started. + +if [ $DEPLOY_MODE == "client" ]; then + if [ -n "$DRIVER_MEMORY" ]; then + export SPARK_DRIVER_MEMORY=$DRIVER_MEMORY + fi + # We parse the default properties file here, assuming each line is + # a key value pair delimited either by white space or "=" sign. All + # spark.driver.* configs must be processed now before it's too late. + if [ -f "$PROPERTIES_FILE" ]; then + DRIVER_EXTRA_JAVA_OPTS="spark.driver.extraJavaOptions" + DRIVER_EXTRA_CLASSPATH="spark.driver.extraClassPath" + DRIVER_EXTRA_LIBRARY_PATH="spark.driver.extraLibraryPath" + # Remove "=" sign and double quotes around the value, if any + DRIVER_EXTRA_JAVA_OPTS=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_JAVA_OPTS" | \ + sed "s/$DRIVER_EXTRA_JAVA_OPTS//g" | sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g") + DRIVER_EXTRA_CLASSPATH=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_CLASSPATH" | \ + sed "s/$DRIVER_EXTRA_CLASSPATH//g" | sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g") + DRIVER_EXTRA_LIBRARY_PATH=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_LIBRARY_PATH" | \ + sed "s/$DRIVER_EXTRA_LIBRARY_PATH//g" | sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g") + export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS $DRIVER_EXTRA_JAVA_OPTS" + export SPARK_SUBMIT_CLASSPATH="$SPARK_SUBMIT_CLASSPATH:$DRIVER_EXTRA_CLASSPATH" + export SPARK_SUBMIT_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DRIVER_EXTRA_LIBRARY_PATH" + fi fi exec $SPARK_HOME/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}" diff --git a/bin/spark-submit.new b/bin/spark-submit.new new file mode 100755 index 000000000000..eb68a5c72e95 --- /dev/null +++ b/bin/spark-submit.new @@ -0,0 +1,67 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +SPARK_HOME="$(cd `dirname $0`/..; pwd)" +ORIG_ARGS=("$@") + +while (($#)); do + if [ "$1" = "--deploy-mode" ]; then + DEPLOY_MODE=$2 + elif [ "$1" = "--driver-memory" ]; then + DRIVER_MEMORY=$2 + elif [ "$1" = "--driver-library-path" ]; then + SPARK_SUBMIT_LIBRARY_PATH=$2 + elif [ "$1" = "--driver-class-path" ]; then + SPARK_SUBMIT_CLASSPATH=$2 + elif [ "$1" = "--driver-java-options" ]; then + SPARK_SUBMIT_OPTS=$2 + elif [ "$1" = "--properties-file" ]; then + PROPERTIES_FILE=$2 + fi + shift +done + +DEPLOY_MODE=${DEPLOY_MODE:-"client"} + +if [ -n "$DRIVER_MEMORY" ] && [ $DEPLOY_MODE == "client" ]; then + SPARK_DRIVER_MEMORY=$DRIVER_MEMORY +fi + +PROPERTIES_FILE=${PROPERTIES_FILE:-"$SPARK_HOME/conf/spark-defaults.conf"} +if [ -f $PROPERTIES_FILE ]; then + DRIVER_EXTRA_JAVA_OPTIONS="spark.driver.extraJavaOptions" + DRIVER_EXTRA_CLASSPATH="spark.driver.extraClassPath" + DRIVER_EXTRA_LIBRARY_PATH="spark.driver.extraLibraryPath" + DRIVER_EXTRA_JAVA_OPTIONS=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_JAVA_OPTIONS" | sed "s/$DRIVER_EXTRA_JAVA_OPTIONS//g" | sed "s/^=//g") + DRIVER_EXTRA_CLASSPATH=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_CLASSPATH" | sed "s/$DRIVER_EXTRA_CLASSPATH//g" | sed "s/^=//g") + DRIVER_EXTRA_LIBRARY_PATH=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_LIBRARY_PATH" | sed "s/$DRIVER_EXTRA_LIBRARY_PATH//g" | sed "s/^=//g") +fi + +echo "DEPLOY_MODE = $DEPLOY_MODE" +echo "DRIVER_MEMORY = $DRIVER_MEMORY" +echo "SPARK_DRIVER_MEMORY = $SPARK_DRIVER_MEMORY" +echo "SPARK_SUBMIT_LIBRARY_PATH = $SPARK_SUBMIT_LIBRARY_PATH" +echo "SPARK_SUBMIT_CLASSPATH = $SPARK_SUBMIT_CLASSPATH" +echo "SPARK_SUBMIT_OPTS = $SPARK_SUBMIT_OPTS" +echo "DRIVER_EXTRA_JAVA_OPTIONS = $DRIVER_EXTRA_JAVA_OPTIONS" +echo "DRIVER_EXTRA_CLASSPATH = $DRIVER_EXTRA_CLASSPATH" +echo "DRIVER_EXTRA_LIBRARY_PATH = $DRIVER_EXTRA_LIBRARY_PATH" + +echo "exec $SPARK_HOME/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}"" + From 0025474d7412607e1ca620d1942393d78a28b7f8 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Tue, 5 Aug 2014 21:35:16 -0700 Subject: [PATCH 03/53] Revert SparkSubmit handling of --driver-* options for only cluster mode --- .../org/apache/spark/deploy/SparkSubmit.scala | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 430bf27c1c8d..0fb94ebe8e45 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -171,12 +171,6 @@ object SparkSubmit { OptionAssigner(args.master, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.master"), OptionAssigner(args.name, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.app.name"), OptionAssigner(args.jars, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.jars"), - OptionAssigner(args.driverExtraClassPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, - sysProp = "spark.driver.extraClassPath"), - OptionAssigner(args.driverExtraJavaOptions, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, - sysProp = "spark.driver.extraJavaOptions"), - OptionAssigner(args.driverExtraLibraryPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, - sysProp = "spark.driver.extraLibraryPath"), // Standalone cluster only OptionAssigner(args.driverMemory, STANDALONE, CLUSTER, clOption = "--memory"), @@ -207,6 +201,15 @@ object SparkSubmit { sysProp = "spark.cores.max"), OptionAssigner(args.files, LOCAL | STANDALONE | MESOS, ALL_DEPLOY_MODES, sysProp = "spark.files") + + // Only process driver specific options for cluster mode here, + // because they have already been processed in bash for client mode + OptionAssigner(args.driverExtraClassPath, STANDALONE | YARN, CLUSTER, + sysProp = "spark.driver.extraClassPath"), + OptionAssigner(args.driverExtraJavaOptions, STANDALONE | YARN, CLUSTER, + sysProp = "spark.driver.extraJavaOptions"), + OptionAssigner(args.driverExtraLibraryPath, STANDALONE | YARN, CLUSTER, + sysProp = "spark.driver.extraLibraryPath"), ) // In client mode, launch the application main class directly From 75ee6b4a1c6df1a911cf62ded81e4eabb737b345 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Tue, 5 Aug 2014 21:36:35 -0700 Subject: [PATCH 04/53] Remove accidentally added file --- bin/spark-submit.new | 67 -------------------------------------------- 1 file changed, 67 deletions(-) delete mode 100755 bin/spark-submit.new diff --git a/bin/spark-submit.new b/bin/spark-submit.new deleted file mode 100755 index eb68a5c72e95..000000000000 --- a/bin/spark-submit.new +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env bash - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -SPARK_HOME="$(cd `dirname $0`/..; pwd)" -ORIG_ARGS=("$@") - -while (($#)); do - if [ "$1" = "--deploy-mode" ]; then - DEPLOY_MODE=$2 - elif [ "$1" = "--driver-memory" ]; then - DRIVER_MEMORY=$2 - elif [ "$1" = "--driver-library-path" ]; then - SPARK_SUBMIT_LIBRARY_PATH=$2 - elif [ "$1" = "--driver-class-path" ]; then - SPARK_SUBMIT_CLASSPATH=$2 - elif [ "$1" = "--driver-java-options" ]; then - SPARK_SUBMIT_OPTS=$2 - elif [ "$1" = "--properties-file" ]; then - PROPERTIES_FILE=$2 - fi - shift -done - -DEPLOY_MODE=${DEPLOY_MODE:-"client"} - -if [ -n "$DRIVER_MEMORY" ] && [ $DEPLOY_MODE == "client" ]; then - SPARK_DRIVER_MEMORY=$DRIVER_MEMORY -fi - -PROPERTIES_FILE=${PROPERTIES_FILE:-"$SPARK_HOME/conf/spark-defaults.conf"} -if [ -f $PROPERTIES_FILE ]; then - DRIVER_EXTRA_JAVA_OPTIONS="spark.driver.extraJavaOptions" - DRIVER_EXTRA_CLASSPATH="spark.driver.extraClassPath" - DRIVER_EXTRA_LIBRARY_PATH="spark.driver.extraLibraryPath" - DRIVER_EXTRA_JAVA_OPTIONS=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_JAVA_OPTIONS" | sed "s/$DRIVER_EXTRA_JAVA_OPTIONS//g" | sed "s/^=//g") - DRIVER_EXTRA_CLASSPATH=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_CLASSPATH" | sed "s/$DRIVER_EXTRA_CLASSPATH//g" | sed "s/^=//g") - DRIVER_EXTRA_LIBRARY_PATH=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_LIBRARY_PATH" | sed "s/$DRIVER_EXTRA_LIBRARY_PATH//g" | sed "s/^=//g") -fi - -echo "DEPLOY_MODE = $DEPLOY_MODE" -echo "DRIVER_MEMORY = $DRIVER_MEMORY" -echo "SPARK_DRIVER_MEMORY = $SPARK_DRIVER_MEMORY" -echo "SPARK_SUBMIT_LIBRARY_PATH = $SPARK_SUBMIT_LIBRARY_PATH" -echo "SPARK_SUBMIT_CLASSPATH = $SPARK_SUBMIT_CLASSPATH" -echo "SPARK_SUBMIT_OPTS = $SPARK_SUBMIT_OPTS" -echo "DRIVER_EXTRA_JAVA_OPTIONS = $DRIVER_EXTRA_JAVA_OPTIONS" -echo "DRIVER_EXTRA_CLASSPATH = $DRIVER_EXTRA_CLASSPATH" -echo "DRIVER_EXTRA_LIBRARY_PATH = $DRIVER_EXTRA_LIBRARY_PATH" - -echo "exec $SPARK_HOME/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}"" - From 8843562bb6883a092e6f4032f05fe01932db086b Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Tue, 5 Aug 2014 21:37:11 -0700 Subject: [PATCH 05/53] Fix compilation issues... --- core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 0fb94ebe8e45..f8cdbc3c392b 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -200,7 +200,7 @@ object SparkSubmit { OptionAssigner(args.totalExecutorCores, STANDALONE | MESOS, ALL_DEPLOY_MODES, sysProp = "spark.cores.max"), OptionAssigner(args.files, LOCAL | STANDALONE | MESOS, ALL_DEPLOY_MODES, - sysProp = "spark.files") + sysProp = "spark.files"), // Only process driver specific options for cluster mode here, // because they have already been processed in bash for client mode @@ -209,7 +209,7 @@ object SparkSubmit { OptionAssigner(args.driverExtraJavaOptions, STANDALONE | YARN, CLUSTER, sysProp = "spark.driver.extraJavaOptions"), OptionAssigner(args.driverExtraLibraryPath, STANDALONE | YARN, CLUSTER, - sysProp = "spark.driver.extraLibraryPath"), + sysProp = "spark.driver.extraLibraryPath") ) // In client mode, launch the application main class directly From 98dd8e327ac5940d8a4a3820027645ee4b88178e Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Tue, 5 Aug 2014 21:39:07 -0700 Subject: [PATCH 06/53] Add warning if properties file does not exist --- bin/spark-submit | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bin/spark-submit b/bin/spark-submit index 70d4d9c06893..3f4a3840b4d9 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -53,6 +53,7 @@ if [ $DEPLOY_MODE == "client" ]; then # a key value pair delimited either by white space or "=" sign. All # spark.driver.* configs must be processed now before it's too late. if [ -f "$PROPERTIES_FILE" ]; then + echo "Using properties file $PROPERTIES_FILE" DRIVER_EXTRA_JAVA_OPTS="spark.driver.extraJavaOptions" DRIVER_EXTRA_CLASSPATH="spark.driver.extraClassPath" DRIVER_EXTRA_LIBRARY_PATH="spark.driver.extraLibraryPath" @@ -66,6 +67,8 @@ if [ $DEPLOY_MODE == "client" ]; then export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS $DRIVER_EXTRA_JAVA_OPTS" export SPARK_SUBMIT_CLASSPATH="$SPARK_SUBMIT_CLASSPATH:$DRIVER_EXTRA_CLASSPATH" export SPARK_SUBMIT_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DRIVER_EXTRA_LIBRARY_PATH" + else + echo "Warning: properties file $PROPERTIES_FILE does not exist." fi fi From 130f295e085d95e8205d882174a5667d29b3b1f2 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Tue, 5 Aug 2014 22:12:28 -0700 Subject: [PATCH 07/53] Handle spark.driver.memory too --- bin/spark-submit | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/bin/spark-submit b/bin/spark-submit index 3f4a3840b4d9..e86032e006dd 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -46,30 +46,36 @@ PROPERTIES_FILE=${PROPERTIES_FILE:-"$SPARK_HOME/conf/spark-defaults.conf"} # time the JVM has started. if [ $DEPLOY_MODE == "client" ]; then - if [ -n "$DRIVER_MEMORY" ]; then - export SPARK_DRIVER_MEMORY=$DRIVER_MEMORY - fi # We parse the default properties file here, assuming each line is # a key value pair delimited either by white space or "=" sign. All # spark.driver.* configs must be processed now before it's too late. if [ -f "$PROPERTIES_FILE" ]; then echo "Using properties file $PROPERTIES_FILE" + DRIVER_MEMORY_CONF="spark.driver.memory" DRIVER_EXTRA_JAVA_OPTS="spark.driver.extraJavaOptions" DRIVER_EXTRA_CLASSPATH="spark.driver.extraClassPath" DRIVER_EXTRA_LIBRARY_PATH="spark.driver.extraLibraryPath" # Remove "=" sign and double quotes around the value, if any - DRIVER_EXTRA_JAVA_OPTS=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_JAVA_OPTS" | \ - sed "s/$DRIVER_EXTRA_JAVA_OPTS//g" | sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g") - DRIVER_EXTRA_CLASSPATH=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_CLASSPATH" | \ - sed "s/$DRIVER_EXTRA_CLASSPATH//g" | sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g") - DRIVER_EXTRA_LIBRARY_PATH=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_LIBRARY_PATH" | \ - sed "s/$DRIVER_EXTRA_LIBRARY_PATH//g" | sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g") + DRIVER_MEMORY_CONF=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_MEMORY_CONF" | sed "s/$DRIVER_MEMORY_CONF//g" | \ + sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g" | sed "s/^[[:space:]]*//g" | sed "s/[[:space:]]*$//g") + DRIVER_EXTRA_JAVA_OPTS=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_JAVA_OPTS" | sed "s/$DRIVER_EXTRA_JAVA_OPTS//g" | \ + sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g" | sed "s/^[[:space:]]*//g" | sed "s/[[:space:]]*$//g") + DRIVER_EXTRA_CLASSPATH=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_CLASSPATH" | sed "s/$DRIVER_EXTRA_CLASSPATH//g" | \ + sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g" | sed "s/^[[:space:]]*//g" | sed "s/[[:space:]]*$//g") + DRIVER_EXTRA_LIBRARY_PATH=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_LIBRARY_PATH" | sed "s/$DRIVER_EXTRA_LIBRARY_PATH//g" | \ + sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g" | sed "s/^[[:space:]]*//g" | sed "s/[[:space:]]*$//g") export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS $DRIVER_EXTRA_JAVA_OPTS" export SPARK_SUBMIT_CLASSPATH="$SPARK_SUBMIT_CLASSPATH:$DRIVER_EXTRA_CLASSPATH" export SPARK_SUBMIT_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DRIVER_EXTRA_LIBRARY_PATH" else echo "Warning: properties file $PROPERTIES_FILE does not exist." fi + + # Favor command line memory over config memory + DRIVER_MEMORY=${DRIVER_MEMORY:-"$DRIVER_MEMORY_CONF"} + if [ -n "$DRIVER_MEMORY" ]; then + export SPARK_DRIVER_MEMORY=$DRIVER_MEMORY + fi fi exec $SPARK_HOME/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}" From 4edcaa8027961578246c5cfa8a2d82a92a031265 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Tue, 5 Aug 2014 23:17:56 -0700 Subject: [PATCH 08/53] Redirect stdout to stderr for python --- bin/spark-submit | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/spark-submit b/bin/spark-submit index e86032e006dd..d56696f90783 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -50,7 +50,7 @@ if [ $DEPLOY_MODE == "client" ]; then # a key value pair delimited either by white space or "=" sign. All # spark.driver.* configs must be processed now before it's too late. if [ -f "$PROPERTIES_FILE" ]; then - echo "Using properties file $PROPERTIES_FILE" + echo "Using properties file $PROPERTIES_FILE" 1>&2 DRIVER_MEMORY_CONF="spark.driver.memory" DRIVER_EXTRA_JAVA_OPTS="spark.driver.extraJavaOptions" DRIVER_EXTRA_CLASSPATH="spark.driver.extraClassPath" @@ -68,7 +68,7 @@ if [ $DEPLOY_MODE == "client" ]; then export SPARK_SUBMIT_CLASSPATH="$SPARK_SUBMIT_CLASSPATH:$DRIVER_EXTRA_CLASSPATH" export SPARK_SUBMIT_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DRIVER_EXTRA_LIBRARY_PATH" else - echo "Warning: properties file $PROPERTIES_FILE does not exist." + echo "Warning: properties file $PROPERTIES_FILE does not exist." 1>&2 fi # Favor command line memory over config memory From e5cfb4627df353125f8f2382bad4bb35aa03c7fb Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Wed, 6 Aug 2014 13:26:04 -0700 Subject: [PATCH 09/53] Collapse duplicate code + fix potential whitespace issues --- bin/spark-submit | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/bin/spark-submit b/bin/spark-submit index d56696f90783..46689975d1fe 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -50,25 +50,28 @@ if [ $DEPLOY_MODE == "client" ]; then # a key value pair delimited either by white space or "=" sign. All # spark.driver.* configs must be processed now before it's too late. if [ -f "$PROPERTIES_FILE" ]; then - echo "Using properties file $PROPERTIES_FILE" 1>&2 - DRIVER_MEMORY_CONF="spark.driver.memory" - DRIVER_EXTRA_JAVA_OPTS="spark.driver.extraJavaOptions" - DRIVER_EXTRA_CLASSPATH="spark.driver.extraClassPath" - DRIVER_EXTRA_LIBRARY_PATH="spark.driver.extraLibraryPath" - # Remove "=" sign and double quotes around the value, if any - DRIVER_MEMORY_CONF=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_MEMORY_CONF" | sed "s/$DRIVER_MEMORY_CONF//g" | \ - sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g" | sed "s/^[[:space:]]*//g" | sed "s/[[:space:]]*$//g") - DRIVER_EXTRA_JAVA_OPTS=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_JAVA_OPTS" | sed "s/$DRIVER_EXTRA_JAVA_OPTS//g" | \ - sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g" | sed "s/^[[:space:]]*//g" | sed "s/[[:space:]]*$//g") - DRIVER_EXTRA_CLASSPATH=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_CLASSPATH" | sed "s/$DRIVER_EXTRA_CLASSPATH//g" | \ - sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g" | sed "s/^[[:space:]]*//g" | sed "s/[[:space:]]*$//g") - DRIVER_EXTRA_LIBRARY_PATH=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_LIBRARY_PATH" | sed "s/$DRIVER_EXTRA_LIBRARY_PATH//g" | \ - sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g" | sed "s/^[[:space:]]*//g" | sed "s/[[:space:]]*$//g") + echo "Using properties file $PROPERTIES_FILE." 1>&2 + + # Parse the value of the given config + # This removes the "=" sign, whitespace, and double quotes around the value (if any) + parse_config() { + result=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$1" | \ + sed "s/$1//g" | \ + sed "s/^[[:space:]]*=//g" | \ + sed "s/^[[:space:]]*\"\(.*\)\"[[:space:]]*$/\1/g" | \ + sed "s/^[[:space:]]*\(.*\)[[:space:]]*$/\1/g" \ + ) + } + parse_config "spark.driver.memory"; DRIVER_MEMORY_CONF="$result" + parse_config "spark.driver.extraJavaOptions"; DRIVER_EXTRA_JAVA_OPTS="$result" + parse_config "spark.driver.extraClassPath"; DRIVER_EXTRA_CLASSPATH="$result" + parse_config "spark.driver.extraLibraryPath"; DRIVER_EXTRA_LIBRARY_PATH="$result" + export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS $DRIVER_EXTRA_JAVA_OPTS" export SPARK_SUBMIT_CLASSPATH="$SPARK_SUBMIT_CLASSPATH:$DRIVER_EXTRA_CLASSPATH" export SPARK_SUBMIT_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DRIVER_EXTRA_LIBRARY_PATH" else - echo "Warning: properties file $PROPERTIES_FILE does not exist." 1>&2 + echo "Warning: properties file $PROPERTIES_FILE does not exist!" 1>&2 fi # Favor command line memory over config memory From ef12f74b9b7e7edcefb6b82cb53de3eccbf0d9ad Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Wed, 6 Aug 2014 13:31:32 -0700 Subject: [PATCH 10/53] Minor formatting --- bin/spark-submit | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bin/spark-submit b/bin/spark-submit index 46689975d1fe..faf09f333106 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -55,7 +55,9 @@ if [ $DEPLOY_MODE == "client" ]; then # Parse the value of the given config # This removes the "=" sign, whitespace, and double quotes around the value (if any) parse_config() { - result=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$1" | \ + result=$( \ + sed "/^#/ d" "$PROPERTIES_FILE" | \ + grep "$1" | \ sed "s/$1//g" | \ sed "s/^[[:space:]]*=//g" | \ sed "s/^[[:space:]]*\"\(.*\)\"[[:space:]]*$/\1/g" | \ From fa2136ed14145f8fa18f40e6e1a3a776048c01ab Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Wed, 6 Aug 2014 22:23:44 -0700 Subject: [PATCH 11/53] Escape Java options + parse java properties files properly --- bin/spark-class | 35 +++++++++++++++++++++++++++++++++-- bin/spark-submit | 11 +++++------ 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/bin/spark-class b/bin/spark-class index 3f6beca5becf..829f4cef2b27 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -146,10 +146,41 @@ if $cygwin; then fi export CLASSPATH +# Properly escape java options, dealing with whitespace, double quotes and backslashes +# This accepts a string, and returns the escaped list through ESCAPED_JAVA_OPTS +escape_java_options() { + ESCAPED_JAVA_OPTS=() # return value + option_buffer="" # buffer for collecting parts of an option + opened_quotes=0 # whether we are expecting a closing double quotes + for word in $1; do + contains_quote=$(echo "$word" | grep \" | grep -v \\\\\") + if [ -n "$contains_quote" ]; then + # Flip the bit + opened_quotes=$(((opened_quotes + 1) % 2)) + fi + if [[ $opened_quotes == 0 ]]; then + ESCAPED_JAVA_OPTS+=("$(echo "$option_buffer $word" | sed "s/^[[:space:]]*//" | sed "s/\([^\\]\)\"/\1/g")") + option_buffer="" + else + option_buffer="$option_buffer $word" + fi + done + # Something is wrong if we ended with open double quotes + if [[ $opened_quotes == 1 ]]; then + echo "Java options parse error! Expecting closing double quotes." 1>&2 + exit 1 + fi +} + +escape_java_options "$JAVA_OPTS" +for option in "${ESCAPED_JAVA_OPTS[@]}"; do + echo "$option" +done + if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then echo -n "Spark Command: " 1>&2 - echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2 + echo "$RUNNER" -cp "$CLASSPATH" "${ESCAPED_JAVA_OPTS[@]}" "$@" 1>&2 echo -e "========================================\n" 1>&2 fi -exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" +exec "$RUNNER" -cp "$CLASSPATH" "${ESCAPED_JAVA_OPTS[@]}" "$@" diff --git a/bin/spark-submit b/bin/spark-submit index faf09f333106..d48c48a5244a 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -52,15 +52,14 @@ if [ $DEPLOY_MODE == "client" ]; then if [ -f "$PROPERTIES_FILE" ]; then echo "Using properties file $PROPERTIES_FILE." 1>&2 - # Parse the value of the given config - # This removes the "=" sign, whitespace, and double quotes around the value (if any) + # Parse the value of the given config according to the specifications outlined in + # http://docs.oracle.com/javase/7/docs/api/java/util/Properties.html#load(java.io.Reader) parse_config() { result=$( \ - sed "/^#/ d" "$PROPERTIES_FILE" | \ + sed "/^[#!]/ d" "conf/spark-defaults.conf" | \ grep "$1" | \ - sed "s/$1//g" | \ - sed "s/^[[:space:]]*=//g" | \ - sed "s/^[[:space:]]*\"\(.*\)\"[[:space:]]*$/\1/g" | \ + sed "s/$1//" | \ + sed "s/^[[:space:]]*[:=]\{0,1\}//" | \ sed "s/^[[:space:]]*\(.*\)[[:space:]]*$/\1/g" \ ) } From dec23439ad82718f786ea022b1f118f202687cc1 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Wed, 6 Aug 2014 22:28:38 -0700 Subject: [PATCH 12/53] Only export variables if they exist --- bin/spark-class | 6 ++++-- bin/spark-submit | 12 +++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/bin/spark-class b/bin/spark-class index 829f4cef2b27..700615d05120 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -75,8 +75,10 @@ case "$1" in # Spark submit uses SPARK_SUBMIT_OPTS and SPARK_JAVA_OPTS 'org.apache.spark.deploy.SparkSubmit') - OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS \ - -Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH" + OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS" + if [ -n "$SPARK_SUBMIT_LIBRARY_PATH" ]; then + OUR_JAVA_OPTS="$OUT_JAVA_OPTS -Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH" + fi OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM} ;; diff --git a/bin/spark-submit b/bin/spark-submit index d48c48a5244a..3889ae79f6dc 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -68,9 +68,15 @@ if [ $DEPLOY_MODE == "client" ]; then parse_config "spark.driver.extraClassPath"; DRIVER_EXTRA_CLASSPATH="$result" parse_config "spark.driver.extraLibraryPath"; DRIVER_EXTRA_LIBRARY_PATH="$result" - export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS $DRIVER_EXTRA_JAVA_OPTS" - export SPARK_SUBMIT_CLASSPATH="$SPARK_SUBMIT_CLASSPATH:$DRIVER_EXTRA_CLASSPATH" - export SPARK_SUBMIT_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DRIVER_EXTRA_LIBRARY_PATH" + if [ -n "$DRIVER_EXTRA_JAVA_OPTS" ]; then + export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS $DRIVER_EXTRA_JAVA_OPTS" + fi + if [ -n "$DRIVER_EXTRA_CLASSPATH" ]; then + export SPARK_SUBMIT_CLASSPATH="$SPARK_SUBMIT_CLASSPATH:$DRIVER_EXTRA_CLASSPATH" + fi + if [ -n "$DRIVER_EXTRA_LIBRARY_PATH" ]; then + export SPARK_SUBMIT_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DRIVER_EXTRA_LIBRARY_PATH" + fi else echo "Warning: properties file $PROPERTIES_FILE does not exist!" 1>&2 fi From a4df3c4165ce4546742fbd0b9d92ea612973bb2e Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Wed, 6 Aug 2014 22:47:57 -0700 Subject: [PATCH 13/53] Move parsing and escaping logic to utils.sh This commit also fixes a deadly typo. --- bin/spark-class | 43 ++++++++--------------------------- bin/spark-submit | 39 +++++++++++--------------------- bin/utils.sh | 59 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 59 deletions(-) create mode 100755 bin/utils.sh diff --git a/bin/spark-class b/bin/spark-class index 700615d05120..a61d6a369cf8 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -30,6 +30,9 @@ FWDIR="$(cd `dirname $0`/..; pwd)" # Export this as SPARK_HOME export SPARK_HOME="$FWDIR" +# Load utility functions +. "$SPARK_HOME/bin/utils.sh" + . $FWDIR/bin/load-spark-env.sh if [ -z "$1" ]; then @@ -77,7 +80,7 @@ case "$1" in 'org.apache.spark.deploy.SparkSubmit') OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS" if [ -n "$SPARK_SUBMIT_LIBRARY_PATH" ]; then - OUR_JAVA_OPTS="$OUT_JAVA_OPTS -Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH" + OUR_JAVA_OPTS="$OUR_JAVA_OPTS -Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH" fi OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM} ;; @@ -103,11 +106,16 @@ fi # Set JAVA_OPTS to be able to load native libraries and to set heap size JAVA_OPTS="-XX:MaxPermSize=128m $OUR_JAVA_OPTS" JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM" + # Load extra JAVA_OPTS from conf/java-opts, if it exists if [ -e "$FWDIR/conf/java-opts" ] ; then JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`" fi -export JAVA_OPTS + +# Escape JAVA_OPTS properly to handle whitespace, double quotes and backslashes +# This exports the escaped java options into ESCAPED_JAVA_OPTS +escape_java_options "$JAVA_OPTS" + # Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala! TOOLS_DIR="$FWDIR"/tools @@ -148,37 +156,6 @@ if $cygwin; then fi export CLASSPATH -# Properly escape java options, dealing with whitespace, double quotes and backslashes -# This accepts a string, and returns the escaped list through ESCAPED_JAVA_OPTS -escape_java_options() { - ESCAPED_JAVA_OPTS=() # return value - option_buffer="" # buffer for collecting parts of an option - opened_quotes=0 # whether we are expecting a closing double quotes - for word in $1; do - contains_quote=$(echo "$word" | grep \" | grep -v \\\\\") - if [ -n "$contains_quote" ]; then - # Flip the bit - opened_quotes=$(((opened_quotes + 1) % 2)) - fi - if [[ $opened_quotes == 0 ]]; then - ESCAPED_JAVA_OPTS+=("$(echo "$option_buffer $word" | sed "s/^[[:space:]]*//" | sed "s/\([^\\]\)\"/\1/g")") - option_buffer="" - else - option_buffer="$option_buffer $word" - fi - done - # Something is wrong if we ended with open double quotes - if [[ $opened_quotes == 1 ]]; then - echo "Java options parse error! Expecting closing double quotes." 1>&2 - exit 1 - fi -} - -escape_java_options "$JAVA_OPTS" -for option in "${ESCAPED_JAVA_OPTS[@]}"; do - echo "$option" -done - if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then echo -n "Spark Command: " 1>&2 echo "$RUNNER" -cp "$CLASSPATH" "${ESCAPED_JAVA_OPTS[@]}" "$@" 1>&2 diff --git a/bin/spark-submit b/bin/spark-submit index 3889ae79f6dc..6d7f139fdc3c 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -20,6 +20,14 @@ export SPARK_HOME="$(cd `dirname $0`/..; pwd)" ORIG_ARGS=("$@") +# Load utility functions +. "$SPARK_HOME/bin/utils.sh" + +# For client mode, the driver will be launched in the JVM that launches +# SparkSubmit, so we need to handle the class paths, java options, and +# memory pre-emptively in bash. Otherwise, it will be too late by the +# time the JVM has started. + while (($#)); do if [ "$1" = "--deploy-mode" ]; then DEPLOY_MODE=$2 @@ -40,34 +48,14 @@ done DEPLOY_MODE=${DEPLOY_MODE:-"client"} PROPERTIES_FILE=${PROPERTIES_FILE:-"$SPARK_HOME/conf/spark-defaults.conf"} -# For client mode, the driver will be launched in the JVM that launches -# SparkSubmit, so we need to handle the class paths, java options, and -# memory pre-emptively in bash. Otherwise, it will be too late by the -# time the JVM has started. - if [ $DEPLOY_MODE == "client" ]; then - # We parse the default properties file here, assuming each line is - # a key value pair delimited either by white space or "=" sign. All - # spark.driver.* configs must be processed now before it's too late. + # Parse the default properties file here for spark.driver.* configs if [ -f "$PROPERTIES_FILE" ]; then echo "Using properties file $PROPERTIES_FILE." 1>&2 - - # Parse the value of the given config according to the specifications outlined in - # http://docs.oracle.com/javase/7/docs/api/java/util/Properties.html#load(java.io.Reader) - parse_config() { - result=$( \ - sed "/^[#!]/ d" "conf/spark-defaults.conf" | \ - grep "$1" | \ - sed "s/$1//" | \ - sed "s/^[[:space:]]*[:=]\{0,1\}//" | \ - sed "s/^[[:space:]]*\(.*\)[[:space:]]*$/\1/g" \ - ) - } - parse_config "spark.driver.memory"; DRIVER_MEMORY_CONF="$result" - parse_config "spark.driver.extraJavaOptions"; DRIVER_EXTRA_JAVA_OPTS="$result" - parse_config "spark.driver.extraClassPath"; DRIVER_EXTRA_CLASSPATH="$result" - parse_config "spark.driver.extraLibraryPath"; DRIVER_EXTRA_LIBRARY_PATH="$result" - + parse_java_property "spark.driver.memory"; DRIVER_MEMORY_CONF="$JAVA_PROPERTY_VALUE" + parse_java_property "spark.driver.extraJavaOptions"; DRIVER_EXTRA_JAVA_OPTS="$JAVA_PROPERTY_VALUE" + parse_java_property "spark.driver.extraClassPath"; DRIVER_EXTRA_CLASSPATH="$JAVA_PROPERTY_VALUE" + parse_java_property "spark.driver.extraLibraryPath"; DRIVER_EXTRA_LIBRARY_PATH="$JAVA_PROPERTY_VALUE" if [ -n "$DRIVER_EXTRA_JAVA_OPTS" ]; then export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS $DRIVER_EXTRA_JAVA_OPTS" fi @@ -80,7 +68,6 @@ if [ $DEPLOY_MODE == "client" ]; then else echo "Warning: properties file $PROPERTIES_FILE does not exist!" 1>&2 fi - # Favor command line memory over config memory DRIVER_MEMORY=${DRIVER_MEMORY:-"$DRIVER_MEMORY_CONF"} if [ -n "$DRIVER_MEMORY" ]; then diff --git a/bin/utils.sh b/bin/utils.sh new file mode 100755 index 000000000000..613b72a49cf4 --- /dev/null +++ b/bin/utils.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Parse the value of a config from a java properties file according to the specifications in +# http://docs.oracle.com/javase/7/docs/api/java/util/Properties.html#load(java.io.Reader). +# This accepts the name of the config and returns the value through JAVA_PROPERTY_VALUE. +parse_java_property() { + JAVA_PROPERTY_VALUE=$( \ + sed "/^[#!]/ d" "conf/spark-defaults.conf" | \ + grep "$1" | \ + sed "s/$1//" | \ + sed "s/^[[:space:]]*[:=]\{0,1\}//" | \ + sed "s/^[[:space:]]*\(.*\)[[:space:]]*$/\1/g" \ + ) + export JAVA_PROPERTY_VALUE +} + +# Properly escape java options, dealing with whitespace, double quotes and backslashes +# This accepts a string, and returns the escaped list through ESCAPED_JAVA_OPTS. +escape_java_options() { + ESCAPED_JAVA_OPTS=() # return value + option_buffer="" # buffer for collecting parts of an option + opened_quotes=0 # whether we are expecting a closing double quotes + for word in $1; do + contains_quote=$(echo "$word" | grep \" | grep -v \\\\\") + if [ -n "$contains_quote" ]; then + # Flip the bit + opened_quotes=$(((opened_quotes + 1) % 2)) + fi + if [[ $opened_quotes == 0 ]]; then + ESCAPED_JAVA_OPTS+=("$(echo "$option_buffer $word" | sed "s/^[[:space:]]*//" | sed "s/\([^\\]\)\"/\1/g")") + option_buffer="" + else + option_buffer="$option_buffer $word" + fi + done + # Something is wrong if we ended with open double quotes + if [[ $opened_quotes == 1 ]]; then + echo "Java options parse error! Expecting closing double quotes." 1>&2 + exit 1 + fi + export ESCAPED_JAVA_OPTS +} + From de765c9813275b939741e1b78567b2443fab5f2d Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Wed, 6 Aug 2014 23:22:05 -0700 Subject: [PATCH 14/53] Print spark-class command properly --- bin/spark-class | 5 ++++- bin/utils.sh | 28 +++++++++++++++++++++++++--- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/bin/spark-class b/bin/spark-class index a61d6a369cf8..5399ea1e2311 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -157,8 +157,11 @@ fi export CLASSPATH if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then + # Put quotes around system properties in case they contain spaces + # This exports the resulting list of java opts into QUOTED_JAVA_OPTS + quote_java_property "${ESCAPED_JAVA_OPTS[@]}" echo -n "Spark Command: " 1>&2 - echo "$RUNNER" -cp "$CLASSPATH" "${ESCAPED_JAVA_OPTS[@]}" "$@" 1>&2 + echo "$RUNNER" -cp "$CLASSPATH" "${QUOTED_JAVA_OPTS[@]}" "$@" 1>&2 echo -e "========================================\n" 1>&2 fi diff --git a/bin/utils.sh b/bin/utils.sh index 613b72a49cf4..cf15f69308c8 100755 --- a/bin/utils.sh +++ b/bin/utils.sh @@ -16,9 +16,14 @@ # limitations under the License. # +# * ---------------------------------------------------- * +# | Utility functions for launching Spark applications | +# * ---------------------------------------------------- * + # Parse the value of a config from a java properties file according to the specifications in # http://docs.oracle.com/javase/7/docs/api/java/util/Properties.html#load(java.io.Reader). # This accepts the name of the config and returns the value through JAVA_PROPERTY_VALUE. +# This currently does not support multi-line configs. parse_java_property() { JAVA_PROPERTY_VALUE=$( \ sed "/^[#!]/ d" "conf/spark-defaults.conf" | \ @@ -30,22 +35,24 @@ parse_java_property() { export JAVA_PROPERTY_VALUE } -# Properly escape java options, dealing with whitespace, double quotes and backslashes -# This accepts a string, and returns the escaped list through ESCAPED_JAVA_OPTS. +# Properly escape java options, dealing with whitespace, double quotes and backslashes. +# This accepts a string and returns the escaped list through ESCAPED_JAVA_OPTS. escape_java_options() { ESCAPED_JAVA_OPTS=() # return value option_buffer="" # buffer for collecting parts of an option opened_quotes=0 # whether we are expecting a closing double quotes for word in $1; do contains_quote=$(echo "$word" | grep \" | grep -v \\\\\") - if [ -n "$contains_quote" ]; then + if [[ -n "$contains_quote" ]]; then # Flip the bit opened_quotes=$(((opened_quotes + 1) % 2)) fi if [[ $opened_quotes == 0 ]]; then + # Remove all non-escaped quotes around the value ESCAPED_JAVA_OPTS+=("$(echo "$option_buffer $word" | sed "s/^[[:space:]]*//" | sed "s/\([^\\]\)\"/\1/g")") option_buffer="" else + # We are expecting a closing double quote, so keep buffering option_buffer="$option_buffer $word" fi done @@ -57,3 +64,18 @@ escape_java_options() { export ESCAPED_JAVA_OPTS } +# Put double quotes around each of the given java options that is a system property. +# This accepts a list and returns the quoted list through QUOTED_JAVA_OPTS +quote_java_property() { + QUOTED_JAVA_OPTS=() + for opt in "$@"; do + is_system_property=$(echo "$opt" | grep -e "^-D") + if [[ -n "$is_system_property" ]]; then + QUOTED_JAVA_OPTS+=(\"$opt\") + else + QUOTED_JAVA_OPTS+=("$opt") + fi + done + export QUOTED_JAVA_OPTS +} + From 8e552b733d52ada89dd7c0e8692fcca87fc00d26 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Wed, 6 Aug 2014 23:25:36 -0700 Subject: [PATCH 15/53] Include an example of spark.*.extraJavaOptions Right now it's not super obvious how to specify multiple java options, especially ones with white spaces. --- conf/spark-defaults.conf.template | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/conf/spark-defaults.conf.template b/conf/spark-defaults.conf.template index 2779342769c1..974ece201495 100644 --- a/conf/spark-defaults.conf.template +++ b/conf/spark-defaults.conf.template @@ -2,7 +2,8 @@ # This is useful for setting default environmental settings. # Example: -# spark.master spark://master:7077 -# spark.eventLog.enabled true -# spark.eventLog.dir hdfs://namenode:8021/directory -# spark.serializer org.apache.spark.serializer.KryoSerializer +# spark.master spark://master:7077 +# spark.eventLog.enabled true +# spark.eventLog.dir hdfs://namenode:8021/directory +# spark.serializer org.apache.spark.serializer.KryoSerializer +# spark.executor.extraJavaOptions -XX:+PrintGCDetail -Dmy.property="one two three" From c854859be8a604ac04c74488e7729423c47acd37 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Wed, 6 Aug 2014 23:38:39 -0700 Subject: [PATCH 16/53] Add small comment --- bin/spark-submit | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/spark-submit b/bin/spark-submit index 6d7f139fdc3c..be8c464599e0 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -52,6 +52,7 @@ if [ $DEPLOY_MODE == "client" ]; then # Parse the default properties file here for spark.driver.* configs if [ -f "$PROPERTIES_FILE" ]; then echo "Using properties file $PROPERTIES_FILE." 1>&2 + # This exports the value of the given key into JAVA_PROPERTY_VALUE parse_java_property "spark.driver.memory"; DRIVER_MEMORY_CONF="$JAVA_PROPERTY_VALUE" parse_java_property "spark.driver.extraJavaOptions"; DRIVER_EXTRA_JAVA_OPTS="$JAVA_PROPERTY_VALUE" parse_java_property "spark.driver.extraClassPath"; DRIVER_EXTRA_CLASSPATH="$JAVA_PROPERTY_VALUE" From 1cdc6b15ff375bfb0ce3fe3f6b6c434dc4e30947 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Thu, 7 Aug 2014 12:33:55 -0700 Subject: [PATCH 17/53] Fix bug: escape escaped double quotes properly The previous code used to ignore all closing quotes if the same token also has an escaped double quote. For example, in -Dkey="I am the \"man\"" the last token contains both escaped quotes and valid quotes. This used to be interpreted as a token that doesn't have a closing quote when it actually does. This is fixed in this commit. --- bin/utils.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/utils.sh b/bin/utils.sh index cf15f69308c8..6aa730760c75 100755 --- a/bin/utils.sh +++ b/bin/utils.sh @@ -42,7 +42,7 @@ escape_java_options() { option_buffer="" # buffer for collecting parts of an option opened_quotes=0 # whether we are expecting a closing double quotes for word in $1; do - contains_quote=$(echo "$word" | grep \" | grep -v \\\\\") + contains_quote=$(echo "$word" | sed "s/\\\\\"//g" | grep "\"") if [[ -n "$contains_quote" ]]; then # Flip the bit opened_quotes=$(((opened_quotes + 1) % 2)) From 45a1eb996773fa1828d1d489cbc451f2033845e0 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Thu, 7 Aug 2014 13:28:56 -0700 Subject: [PATCH 18/53] Fix bug: escape escaped backslashes and quotes properly... This is so that the way this is parsed and the way Java parses its java opts is consistent. --- bin/utils.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/bin/utils.sh b/bin/utils.sh index 6aa730760c75..e1624fe07752 100755 --- a/bin/utils.sh +++ b/bin/utils.sh @@ -49,7 +49,12 @@ escape_java_options() { fi if [[ $opened_quotes == 0 ]]; then # Remove all non-escaped quotes around the value - ESCAPED_JAVA_OPTS+=("$(echo "$option_buffer $word" | sed "s/^[[:space:]]*//" | sed "s/\([^\\]\)\"/\1/g")") + ESCAPED_JAVA_OPTS+=("$( + echo "$option_buffer $word" | \ + sed "s/^[[:space:]]*//" | \ + sed "s/\([^\\]\)\"/\1/g" | \ + sed "s/\\\\\([\\\"]\)/\1/g" + )") option_buffer="" else # We are expecting a closing double quote, so keep buffering From aabfc7e1da8897b266020da6c480cbe7d774bc99 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Thu, 7 Aug 2014 14:24:57 -0700 Subject: [PATCH 19/53] escape -> split (minor) --- bin/spark-class | 10 +++++----- bin/utils.sh | 16 ++++++++-------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/bin/spark-class b/bin/spark-class index 5399ea1e2311..ce5bebe5929e 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -112,9 +112,9 @@ if [ -e "$FWDIR/conf/java-opts" ] ; then JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`" fi -# Escape JAVA_OPTS properly to handle whitespace, double quotes and backslashes -# This exports the escaped java options into ESCAPED_JAVA_OPTS -escape_java_options "$JAVA_OPTS" +# Split JAVA_OPTS properly to handle whitespace, double quotes and backslashes +# This exports the split java options into SPLIT_JAVA_OPTS +split_java_options "$JAVA_OPTS" # Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala! @@ -159,10 +159,10 @@ export CLASSPATH if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then # Put quotes around system properties in case they contain spaces # This exports the resulting list of java opts into QUOTED_JAVA_OPTS - quote_java_property "${ESCAPED_JAVA_OPTS[@]}" + quote_java_property "${SPLIT_JAVA_OPTS[@]}" echo -n "Spark Command: " 1>&2 echo "$RUNNER" -cp "$CLASSPATH" "${QUOTED_JAVA_OPTS[@]}" "$@" 1>&2 echo -e "========================================\n" 1>&2 fi -exec "$RUNNER" -cp "$CLASSPATH" "${ESCAPED_JAVA_OPTS[@]}" "$@" +exec "$RUNNER" -cp "$CLASSPATH" "${SPLIT_JAVA_OPTS[@]}" "$@" diff --git a/bin/utils.sh b/bin/utils.sh index e1624fe07752..431813072754 100755 --- a/bin/utils.sh +++ b/bin/utils.sh @@ -35,12 +35,12 @@ parse_java_property() { export JAVA_PROPERTY_VALUE } -# Properly escape java options, dealing with whitespace, double quotes and backslashes. -# This accepts a string and returns the escaped list through ESCAPED_JAVA_OPTS. -escape_java_options() { - ESCAPED_JAVA_OPTS=() # return value - option_buffer="" # buffer for collecting parts of an option - opened_quotes=0 # whether we are expecting a closing double quotes +# Properly split java options, dealing with whitespace, double quotes and backslashes. +# This accepts a string and returns the resulting list through SPLIT_JAVA_OPTS. +split_java_options() { + SPLIT_JAVA_OPTS=() # return value + option_buffer="" # buffer for collecting parts of an option + opened_quotes=0 # whether we are expecting a closing double quotes for word in $1; do contains_quote=$(echo "$word" | sed "s/\\\\\"//g" | grep "\"") if [[ -n "$contains_quote" ]]; then @@ -49,7 +49,7 @@ escape_java_options() { fi if [[ $opened_quotes == 0 ]]; then # Remove all non-escaped quotes around the value - ESCAPED_JAVA_OPTS+=("$( + SPLIT_JAVA_OPTS+=("$( echo "$option_buffer $word" | \ sed "s/^[[:space:]]*//" | \ sed "s/\([^\\]\)\"/\1/g" | \ @@ -66,7 +66,7 @@ escape_java_options() { echo "Java options parse error! Expecting closing double quotes." 1>&2 exit 1 fi - export ESCAPED_JAVA_OPTS + export SPLIT_JAVA_OPTS } # Put double quotes around each of the given java options that is a system property. From a992ae2ba7067cf76fba0e3ef192b275eee40b57 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Thu, 7 Aug 2014 16:51:16 -0700 Subject: [PATCH 20/53] Escape spark.*.extraJavaOptions correctly We previously never dealt with this correctly, in that we evaluated all backslashes twice, once when passing spark.*.extraJavaOptions into SparkSubmit, and another time when calling Utils.splitCommandString. This means we need to pass the raw values of these configs directly to the JVM without evaluating the backslashes when launching SparkSubmit. The way we do this is through a few custom environment variables. As of this commit, the user should follow the format outlined in spark-defaults.conf.template for spark.*.extraJavaOptions, and the expected java options (with quotes, whitespaces and backslashes and everything) will be propagated to the driver or the executors correctly. --- bin/spark-submit | 81 ++++++++++++------- .../spark/deploy/SparkSubmitArguments.scala | 9 +++ 2 files changed, 63 insertions(+), 27 deletions(-) diff --git a/bin/spark-submit b/bin/spark-submit index be8c464599e0..1a815becae9b 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -23,11 +23,6 @@ ORIG_ARGS=("$@") # Load utility functions . "$SPARK_HOME/bin/utils.sh" -# For client mode, the driver will be launched in the JVM that launches -# SparkSubmit, so we need to handle the class paths, java options, and -# memory pre-emptively in bash. Otherwise, it will be too late by the -# time the JVM has started. - while (($#)); do if [ "$1" = "--deploy-mode" ]; then DEPLOY_MODE=$2 @@ -46,32 +41,64 @@ while (($#)); do done DEPLOY_MODE=${DEPLOY_MODE:-"client"} -PROPERTIES_FILE=${PROPERTIES_FILE:-"$SPARK_HOME/conf/spark-defaults.conf"} +DEFAULT_PROPERTIES_FILE="$SPARK_HOME/conf/spark-defaults.conf" +PROPERTIES_FILE=${PROPERTIES_FILE:-"$DEFAULT_PROPERTIES_FILE"} + +unset DRIVER_EXTRA_JAVA_OPTIONS +unset EXECUTOR_EXTRA_JAVA_OPTIONS + +# A few Spark configs must be parsed early on before launching the JVM: +# +# [spark.driver.extra*] +# These configs encode java options, class paths, and library paths +# needed to launch the JVM if we are running Spark in client mode +# +# [spark.*.extraJavaOptions] +# The escaped characters in these configs must be preserved for +# splitting the arguments in Java later. For these configs, we +# export the raw values as environment variables. +# +if [[ -f "$PROPERTIES_FILE" ]]; then + echo "Using properties file $PROPERTIES_FILE." 1>&2 + # This exports the value of the given key into JAVA_PROPERTY_VALUE + parse_java_property "spark.driver.memory" + DRIVER_MEMORY_CONF="$JAVA_PROPERTY_VALUE" + parse_java_property "spark.driver.extraLibraryPath" + DRIVER_EXTRA_LIBRARY_PATH="$JAVA_PROPERTY_VALUE" + parse_java_property "spark.driver.extraClassPath" + DRIVER_EXTRA_CLASSPATH="$JAVA_PROPERTY_VALUE" + parse_java_property "spark.driver.extraJavaOptions" + DRIVER_EXTRA_JAVA_OPTS="$JAVA_PROPERTY_VALUE" + parse_java_property "spark.executor.extraJavaOptions" + EXECUTOR_EXTRA_JAVA_OPTS="$JAVA_PROPERTY_VALUE" + if [[ -n "DRIVER_EXTRA_JAVA_OPTS" ]]; then + export DRIVER_EXTRA_JAVA_OPTS + fi + if [[ -n "EXECUTOR_EXTRA_JAVA_OPTS" ]]; then + export EXECUTOR_EXTRA_JAVA_OPTS + fi +elif [[ "$PROPERTIES_FILE" != "$DEFAULT_PROPERTIES_FILE" ]]; then + echo "Warning: properties file $PROPERTIES_FILE does not exist." 1>&2 +fi + +# For client mode, the driver will be launched in the JVM that launches +# SparkSubmit, so we need to handle the class paths, java options, and +# memory pre-emptively in bash. Otherwise, it will be too late by the +# time the JVM has started. -if [ $DEPLOY_MODE == "client" ]; then - # Parse the default properties file here for spark.driver.* configs - if [ -f "$PROPERTIES_FILE" ]; then - echo "Using properties file $PROPERTIES_FILE." 1>&2 - # This exports the value of the given key into JAVA_PROPERTY_VALUE - parse_java_property "spark.driver.memory"; DRIVER_MEMORY_CONF="$JAVA_PROPERTY_VALUE" - parse_java_property "spark.driver.extraJavaOptions"; DRIVER_EXTRA_JAVA_OPTS="$JAVA_PROPERTY_VALUE" - parse_java_property "spark.driver.extraClassPath"; DRIVER_EXTRA_CLASSPATH="$JAVA_PROPERTY_VALUE" - parse_java_property "spark.driver.extraLibraryPath"; DRIVER_EXTRA_LIBRARY_PATH="$JAVA_PROPERTY_VALUE" - if [ -n "$DRIVER_EXTRA_JAVA_OPTS" ]; then - export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS $DRIVER_EXTRA_JAVA_OPTS" - fi - if [ -n "$DRIVER_EXTRA_CLASSPATH" ]; then - export SPARK_SUBMIT_CLASSPATH="$SPARK_SUBMIT_CLASSPATH:$DRIVER_EXTRA_CLASSPATH" - fi - if [ -n "$DRIVER_EXTRA_LIBRARY_PATH" ]; then - export SPARK_SUBMIT_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DRIVER_EXTRA_LIBRARY_PATH" - fi - else - echo "Warning: properties file $PROPERTIES_FILE does not exist!" 1>&2 +if [[ $DEPLOY_MODE == "client" ]]; then + if [[ -n "$DRIVER_EXTRA_JAVA_OPTS" ]]; then + export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS $DRIVER_EXTRA_JAVA_OPTS" + fi + if [[ -n "$DRIVER_EXTRA_CLASSPATH" ]]; then + export SPARK_SUBMIT_CLASSPATH="$SPARK_SUBMIT_CLASSPATH:$DRIVER_EXTRA_CLASSPATH" + fi + if [[ -n "$DRIVER_EXTRA_LIBRARY_PATH" ]]; then + export SPARK_SUBMIT_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DRIVER_EXTRA_LIBRARY_PATH" fi # Favor command line memory over config memory DRIVER_MEMORY=${DRIVER_MEMORY:-"$DRIVER_MEMORY_CONF"} - if [ -n "$DRIVER_MEMORY" ]; then + if [[ -n "$DRIVER_MEMORY" ]]; then export SPARK_DRIVER_MEMORY=$DRIVER_MEMORY fi fi diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index 087dd4d633db..614089272c1e 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -76,6 +76,15 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { } } } + // For spark.*.extraJavaOptions, we cannot rely on the Java properties loader because it + // un-escapes certain characters (" and \) needed to split the string into java options. + // For these configs, use the equivalent environment variables instead. + sys.env.get("DRIVER_EXTRA_JAVA_OPTS").foreach { opts => + defaultProperties("spark.driver.extraJavaOptions") = opts + } + sys.env.get("EXECUTOR_EXTRA_JAVA_OPTS").foreach { opts => + defaultProperties("spark.executor.extraJavaOptions") = opts + } defaultProperties } From c7b99267c577195882c965029884941b79cc8ed0 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Thu, 7 Aug 2014 16:51:29 -0700 Subject: [PATCH 21/53] Minor changes to spark-defaults.conf.template ... to highlight our new-found ability to deal with special values. --- conf/spark-defaults.conf.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/spark-defaults.conf.template b/conf/spark-defaults.conf.template index 974ece201495..ad7273d830c1 100644 --- a/conf/spark-defaults.conf.template +++ b/conf/spark-defaults.conf.template @@ -6,4 +6,4 @@ # spark.eventLog.enabled true # spark.eventLog.dir hdfs://namenode:8021/directory # spark.serializer org.apache.spark.serializer.KryoSerializer -# spark.executor.extraJavaOptions -XX:+PrintGCDetail -Dmy.property="one two three" +# spark.executor.extraJavaOptions -XX:+PrintGCDetail -Dnumbers="one \"two\" three" From e793e5f56c5c62d94fe0f2ac3d8aefc1d0b1573e Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Thu, 7 Aug 2014 18:38:17 -0700 Subject: [PATCH 22/53] Handle multi-line arguments --- bin/utils.sh | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/bin/utils.sh b/bin/utils.sh index 431813072754..5280b9c40e92 100755 --- a/bin/utils.sh +++ b/bin/utils.sh @@ -22,12 +22,33 @@ # Parse the value of a config from a java properties file according to the specifications in # http://docs.oracle.com/javase/7/docs/api/java/util/Properties.html#load(java.io.Reader). -# This accepts the name of the config and returns the value through JAVA_PROPERTY_VALUE. -# This currently does not support multi-line configs. +# This accepts the name of the config as an argument, and expects the path of the property +# file to be found in PROPERTIES_FILE. The value is returned through JAVA_PROPERTY_VALUE. parse_java_property() { + JAVA_PROPERTY_VALUE="" # return value + config_buffer="" # buffer for collecting parts of a config value + multi_line=0 # whether this config is spanning multiple lines + while read -r line; do + # Strip leading and trailing whitespace + line=$(echo "$line" | sed "s/^[[:space:]]\(.*\)[[:space:]]*$/\1/") + contains_config=$(echo "$line" | grep -e "^$1") + if [[ -n "$contains_config" || "$multi_line" == 1 ]]; then + has_more_lines=$(echo "$line" | grep -e "\\\\$") + if [[ -n "$has_more_lines" ]]; then + # Strip trailing backslash + line=$(echo "$line" | sed "s/\\\\$//") + config_buffer="$config_buffer $line" + multi_line=1 + else + JAVA_PROPERTY_VALUE="$config_buffer $line" + break + fi + fi + done < "$PROPERTIES_FILE" + + # Actually extract the value of the config JAVA_PROPERTY_VALUE=$( \ - sed "/^[#!]/ d" "conf/spark-defaults.conf" | \ - grep "$1" | \ + echo "$JAVA_PROPERTY_VALUE" | \ sed "s/$1//" | \ sed "s/^[[:space:]]*[:=]\{0,1\}//" | \ sed "s/^[[:space:]]*\(.*\)[[:space:]]*$/\1/g" \ @@ -63,7 +84,8 @@ split_java_options() { done # Something is wrong if we ended with open double quotes if [[ $opened_quotes == 1 ]]; then - echo "Java options parse error! Expecting closing double quotes." 1>&2 + echo -e "Java options parse error! Expecting closing double quotes:" 1>&2 + echo -e " ${SPLIT_JAVA_OPTS[@]}" 1>&2 exit 1 fi export SPLIT_JAVA_OPTS From c2273fcad9a4f3e5cdfba56eb88e483100728a8a Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Thu, 7 Aug 2014 18:53:20 -0700 Subject: [PATCH 23/53] Fix typo (minor) --- bin/spark-submit | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/spark-submit b/bin/spark-submit index 1a815becae9b..0eab865a5baf 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -83,7 +83,7 @@ fi # For client mode, the driver will be launched in the JVM that launches # SparkSubmit, so we need to handle the class paths, java options, and -# memory pre-emptively in bash. Otherwise, it will be too late by the +# memory preemptively in bash. Otherwise, it will be too late by the # time the JVM has started. if [[ $DEPLOY_MODE == "client" ]]; then From b3c4cd562f86745d2d75692b8681eb79c62547cd Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Fri, 8 Aug 2014 11:47:07 -0700 Subject: [PATCH 24/53] Fix bug: count the number of quotes instead of detecting presence A java option of -Dfoo="bar" would fail this previously, because there are two quotes around bar, but we only detected open quotes by the presence of the double quote character. --- bin/utils.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/utils.sh b/bin/utils.sh index 5280b9c40e92..6080f3933e21 100755 --- a/bin/utils.sh +++ b/bin/utils.sh @@ -63,8 +63,8 @@ split_java_options() { option_buffer="" # buffer for collecting parts of an option opened_quotes=0 # whether we are expecting a closing double quotes for word in $1; do - contains_quote=$(echo "$word" | sed "s/\\\\\"//g" | grep "\"") - if [[ -n "$contains_quote" ]]; then + num_quotes=$(echo "$word" | sed "s/\\\\\"//g" | grep -o "\"" | grep -c .) + if [[ $((num_quotes % 2)) == 1 ]]; then # Flip the bit opened_quotes=$(((opened_quotes + 1) % 2)) fi From 4ae24c3e1fb49f8f38e3fa4426dcc9e00456c56e Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Fri, 8 Aug 2014 16:41:06 -0700 Subject: [PATCH 25/53] Fix bug: escape properly in quote_java_property --- bin/utils.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/utils.sh b/bin/utils.sh index 6080f3933e21..4b3c09fbe253 100755 --- a/bin/utils.sh +++ b/bin/utils.sh @@ -98,7 +98,7 @@ quote_java_property() { for opt in "$@"; do is_system_property=$(echo "$opt" | grep -e "^-D") if [[ -n "$is_system_property" ]]; then - QUOTED_JAVA_OPTS+=(\"$opt\") + QUOTED_JAVA_OPTS+=("\"$opt\"") else QUOTED_JAVA_OPTS+=("$opt") fi From 8d26a5c4c7802c8b712614e17c23e7ab4c2ea2d7 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Fri, 8 Aug 2014 16:55:23 -0700 Subject: [PATCH 26/53] Add tests for bash/utils.sh --- bin/test.conf | 66 ++++++++++++++++++ bin/test.sh | 184 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 250 insertions(+) create mode 100644 bin/test.conf create mode 100755 bin/test.sh diff --git a/bin/test.conf b/bin/test.conf new file mode 100644 index 000000000000..cb82e2e684dd --- /dev/null +++ b/bin/test.conf @@ -0,0 +1,66 @@ +# -------------------------------------------------------------------------------------------- +# Spark properties file for testing +# +# The configs are separated into three categories, one for each delimiter that Java supports: +# http://docs.oracle.com/javase/7/docs/api/java/util/Properties.html#load(java.io.Reader). +# The values of the configs are assumed to be identical across the categories. Changes in +# this file must be synced with "bin/test.sh" +# -------------------------------------------------------------------------------------------- + +# Space delimiter +spark.space.1 -Dstraw=berry +spark.space.2 -Dstraw="berry" +spark.space.3 -Dstraw="berry again" +spark.space.4 -Dstraw="berry \"quote" +spark.space.5 -Dstraw="berry \\backslash" +spark.space.6 -Dstraw="berry \"quotes\" and \\backslashes\\ " +spark.space.7 -Dstraw=berry -Dblue=berry -Dblack=berry +spark.space.8 -Dstraw="berry space" -Dblue="berry" -Dblack=berry +spark.space.9 -Dstraw="berry space" -Dblue="berry \"quotes\"" -Dblack="berry \\backslashes\\ " +spark.space.10 -Dstraw="berry space" -Dblue="berry \"quotes\"" -Dblack="berry \\backslashes\\ " +spark.space.11 \ + -Dstraw="berry space" \ + -Dblue="berry \"quotes\"" \ + -Dblack="berry \\backslashes\\ " \ + -Dcherry=berry +spark.space.12 -Dstraw="berry space" -Dblue="berry \"quotes\"" \ + -Dblack="berry \\backslashes\\ " -Dcherry=berry + +# Equal sign delimiter +spark.equal.1=-Dstraw=berry +spark.equal.2=-Dstraw="berry" +spark.equal.3=-Dstraw="berry again" +spark.equal.4=-Dstraw="berry \"quote" +spark.equal.5=-Dstraw="berry \\backslash" +spark.equal.6=-Dstraw="berry \"quotes\" and \\backslashes\\ " +spark.equal.7=-Dstraw=berry -Dblue=berry -Dblack=berry +spark.equal.8=-Dstraw="berry space" -Dblue="berry" -Dblack=berry +spark.equal.9=-Dstraw="berry space" -Dblue="berry \"quotes\"" -Dblack="berry \\backslashes\\ " +spark.equal.10 = -Dstraw="berry space" -Dblue="berry \"quotes\"" -Dblack="berry \\backslashes\\ " +spark.equal.11=\ + -Dstraw="berry space" \ + -Dblue="berry \"quotes\"" \ + -Dblack="berry \\backslashes\\ " \ + -Dcherry=berry +spark.equal.12=-Dstraw="berry space" -Dblue="berry \"quotes\"" \ + -Dblack="berry \\backslashes\\ " -Dcherry=berry + +# Colon delimiter +spark.colon.1:-Dstraw=berry +spark.colon.2:-Dstraw="berry" +spark.colon.3:-Dstraw="berry again" +spark.colon.4:-Dstraw="berry \"quote" +spark.colon.5:-Dstraw="berry \\backslash" +spark.colon.6:-Dstraw="berry \"quotes\" and \\backslashes\\ " +spark.colon.7:-Dstraw=berry -Dblue=berry -Dblack=berry +spark.colon.8:-Dstraw="berry space" -Dblue="berry" -Dblack=berry +spark.colon.9:-Dstraw="berry space" -Dblue="berry \"quotes\"" -Dblack="berry \\backslashes\\ " +spark.colon.10 : -Dstraw="berry space" -Dblue="berry \"quotes\"" -Dblack="berry \\backslashes\\ " +spark.colon.11:\ + -Dstraw="berry space" \ + -Dblue="berry \"quotes\"" \ + -Dblack="berry \\backslashes\\ " \ + -Dcherry=berry +spark.colon.12:-Dstraw="berry space" -Dblue="berry \"quotes\"" \ + -Dblack="berry \\backslashes\\ " -Dcherry=berry + diff --git a/bin/test.sh b/bin/test.sh new file mode 100755 index 000000000000..6de1620f3890 --- /dev/null +++ b/bin/test.sh @@ -0,0 +1,184 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +SPARK_HOME="$(cd `dirname $0`/..; pwd)" +PROPERTIES_FILE="$SPARK_HOME/bin/test.conf" + +# Load utility functions +. "$SPARK_HOME/bin/utils.sh" + +tests_failed=0 + +# Test parse_java_property. This takes in three parameters, the name of the config, +# the expected value, and whether or not to ignore whitespace (e.g. for multi-line). +test_parse_java_property() { + key="$1" + expected_value="$2" + ignore_whitespace="$3" + parse_java_property "$key" + actual_value="$JAVA_PROPERTY_VALUE" + echo " $key -> $actual_value" + # Ignore whitespace for multi-line arguments + if [[ -n "$ignore_whitespace" ]]; then + expected_value=$(echo "$expected_value" | sed "s/[[:space:]]//g") + actual_value=$(echo "$actual_value" | sed "s/[[:space:]]//g") + fi + if [[ "$actual_value" != "$expected_value" ]]; then + echo " XXXXX TEST FAILED XXXXX" + echo " expected: $expected_value" + echo " actual: $actual_value" + tests_failed=1 + fi +} + +# Test split_java_options. This takes in three or more parameters, the name of the config, +# the expected number of java options, and values of the java options themselves. +test_split_java_options() { + key="$1" + expected_size="$2" + expected_values=("${@:3}") + parse_java_property "$key" + echo " $JAVA_PROPERTY_VALUE" + split_java_options "$JAVA_PROPERTY_VALUE" + if [[ "$expected_size" != "${#SPLIT_JAVA_OPTS[@]}" ]]; then + echo " XXXXX TEST FAILED XXXXX" + echo " expected size: $expected_size" + echo " actual size: ${#SPLIT_JAVA_OPTS[@]}" + fi + for i in $(seq 0 $((expected_size - 1))); do + expected_value="${expected_values[$i]}" + actual_value="${SPLIT_JAVA_OPTS[$i]}" + echo " -> $actual_value" + if [[ "$expected_value" != "$actual_value" ]]; then + echo " XXXXX TEST FAILED (key $key) XXXXX" + echo " expected value: $expected_value" + echo " actual value: $actual_value" + tests_failed=1 + break + fi + done +} + +# Test split_java_options. This takes in three or more parameters, the name of the config, +# the expected number of java options, and values of the java options themselves. +test_quote_java_property() { + key="$1" + expected_size="$2" + expected_values=("${@:3}") + parse_java_property "$key" + split_java_options "$JAVA_PROPERTY_VALUE" + quote_java_property "${SPLIT_JAVA_OPTS[@]}" + echo " $JAVA_PROPERTY_VALUE" + for i in $(seq 0 $((expected_size - 1))); do + expected_value="${expected_values[$i]}" + actual_value="${QUOTED_JAVA_OPTS[$i]}" + echo " -> $actual_value" + if [[ "$expected_value" != "$actual_value" ]]; then + echo " XXXXX TEST FAILED (key $key) XXXXX" + echo " expected value: $expected_value" + echo " actual value: $actual_value" + tests_failed=1 + break + fi + done +} + +# Test parse_java_property. This should read the literal value as written in the conf file. +echo "--- Testing parse_java_property ---" +delimiters=("space" "equal" "colon") +test_parse_java_property "does.not.exist" "" +for delimiter in "${delimiters[@]}"; do + test_parse_java_property "spark.$delimiter.1" "-Dstraw=berry" + test_parse_java_property "spark.$delimiter.2" "-Dstraw=\"berry\"" + test_parse_java_property "spark.$delimiter.3" "-Dstraw=\"berry again\"" + test_parse_java_property "spark.$delimiter.4" "-Dstraw=\"berry \\\"quote\"" + test_parse_java_property "spark.$delimiter.5" "-Dstraw=\"berry \\\\backslash\"" + test_parse_java_property "spark.$delimiter.6" "-Dstraw=\"berry \\\"quotes\\\" and \\\\backslashes\\\\ \"" + test_parse_java_property "spark.$delimiter.7" "-Dstraw=berry -Dblue=berry -Dblack=berry" + test_parse_java_property "spark.$delimiter.8" "-Dstraw=\"berry space\" -Dblue=\"berry\" -Dblack=berry" + test_parse_java_property "spark.$delimiter.9" \ + "-Dstraw=\"berry space\" -Dblue=\"berry \\\"quotes\\\"\" -Dblack=\"berry \\\\backslashes\\\\ \"" + test_parse_java_property "spark.$delimiter.10" \ + "-Dstraw=\"berry space\" -Dblue=\"berry \\\"quotes\\\"\" -Dblack=\"berry \\\\backslashes\\\\ \"" IGNORE_WHITESPACE + test_parse_java_property "spark.$delimiter.11" \ + "-Dstraw=\"berry space\" -Dblue=\"berry \\\"quotes\\\"\" -Dblack=\"berry \\\\backslashes\\\\ \" -Dcherry=berry" IGNORE_WHITESPACE + test_parse_java_property "spark.$delimiter.12" \ + "-Dstraw=\"berry space\" -Dblue=\"berry \\\"quotes\\\"\" -Dblack=\"berry \\\\backslashes\\\\ \" -Dcherry=berry" IGNORE_WHITESPACE +done +echo + +# Test split_java_options. Note that this relies on parse_java_property to work correctly. +if [[ "$tests_failed" == 1 ]]; then + echo "* WARNING: Tests for parse_java_property failed!" + echo -e "This should also fail tests for split_java_options\n" +fi +echo "--- Testing split_java_options ---" +test_split_java_options "spark.space.1" 1 "-Dstraw=berry" +test_split_java_options "spark.space.2" 1 "-Dstraw=berry" +test_split_java_options "spark.space.3" 1 "-Dstraw=berry again" +test_split_java_options "spark.space.4" 1 "-Dstraw=berry \"quote" +test_split_java_options "spark.space.5" 1 "-Dstraw=berry \\backslash" +test_split_java_options "spark.space.6" 1 "-Dstraw=berry \"quotes\" and \\backslashes\\ " +test_split_java_options "spark.space.7" 3 "-Dstraw=berry" "-Dblue=berry" "-Dblack=berry" +test_split_java_options "spark.space.8" 3 "-Dstraw=berry space" "-Dblue=berry" "-Dblack=berry" +test_split_java_options "spark.space.9" 3 \ + "-Dstraw=berry space" "-Dblue=berry \"quotes\"" "-Dblack=berry \\backslashes\\ " +test_split_java_options "spark.space.10" 3 \ + "-Dstraw=berry space" "-Dblue=berry \"quotes\"" "-Dblack=berry \\backslashes\\ " +test_split_java_options "spark.space.11" 4 \ + "-Dstraw=berry space" "-Dblue=berry \"quotes\"" "-Dblack=berry \\backslashes\\ " "-Dcherry=berry" +test_split_java_options "spark.space.12" 4 \ + "-Dstraw=berry space" "-Dblue=berry \"quotes\"" "-Dblack=berry \\backslashes\\ " "-Dcherry=berry" +echo + +# Test quote_java_property. Note that this relies on split_java_options to work correctly. +if [[ "$tests_failed" == 1 ]]; then + echo "* WARNING: Tests for split_java_options failed!" + echo -e "This should also fail tests for quote_java_property\n" +fi +echo "--- Testing quote_java_property ---" +test_quote_java_property "spark.space.1" 1 "\"-Dstraw=berry\"" +test_quote_java_property "spark.space.2" 1 "\"-Dstraw=berry\"" +test_quote_java_property "spark.space.3" 1 "\"-Dstraw=berry again\"" +test_quote_java_property "spark.space.4" 1 "\"-Dstraw=berry \"quote\"" +test_quote_java_property "spark.space.5" 1 "\"-Dstraw=berry \\backslash\"" +test_quote_java_property "spark.space.6" 1 "\"-Dstraw=berry \"quotes\" and \\backslashes\\ \"" +test_quote_java_property "spark.space.7" 3 "\"-Dstraw=berry\"" "\"-Dblue=berry\"" "\"-Dblack=berry\"" +test_quote_java_property "spark.space.8" 3 "\"-Dstraw=berry space\"" "\"-Dblue=berry\"" "\"-Dblack=berry\"" +test_quote_java_property "spark.space.9" 3 \ + "\"-Dstraw=berry space\"" "\"-Dblue=berry \"quotes\"\"" "\"-Dblack=berry \\backslashes\\ \"" +test_quote_java_property "spark.space.10" 3 \ + "\"-Dstraw=berry space\"" "\"-Dblue=berry \"quotes\"\"" "\"-Dblack=berry \\backslashes\\ \"" +test_quote_java_property "spark.space.11" 4 \ + "\"-Dstraw=berry space\"" "\"-Dblue=berry \"quotes\"\"" "\"-Dblack=berry \\backslashes\\ \"" "\"-Dcherry=berry\"" +test_quote_java_property "spark.space.12" 4 \ + "\"-Dstraw=berry space\"" "\"-Dblue=berry \"quotes\"\"" "\"-Dblack=berry \\backslashes\\ \"" "\"-Dcherry=berry\"" +echo + +# Final test result +if [[ "$tests_failed" == 0 ]]; then + echo "**********************" + echo " TESTS PASS " + echo "**********************" +else + echo "XXXXXXXXXXXXXXXXXXXXXXXX" + echo "XXXXX TESTS FAILED XXXXX" + echo "XXXXXXXXXXXXXXXXXXXXXXXX" + exit 1 +fi + From 2732ac08d971f6c281b5a4c311ccc51a94fe505b Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Fri, 8 Aug 2014 19:14:31 -0700 Subject: [PATCH 27/53] Integrate BASH tests into dev/run-tests + log error properly --- .rat-excludes | 1 + bin/{test.sh => run-tests} | 125 ++++++++++++++++++++++++++----------- dev/run-tests | 6 ++ 3 files changed, 96 insertions(+), 36 deletions(-) rename bin/{test.sh => run-tests} (70%) diff --git a/.rat-excludes b/.rat-excludes index bccb043c2bb5..c20226f233a9 100644 --- a/.rat-excludes +++ b/.rat-excludes @@ -29,6 +29,7 @@ sorttable.js .*json .*data .*log +.*conf cloudpickle.py join.py SparkExprTyper.scala diff --git a/bin/test.sh b/bin/run-tests similarity index 70% rename from bin/test.sh rename to bin/run-tests index 6de1620f3890..58b111244d16 100755 --- a/bin/test.sh +++ b/bin/run-tests @@ -16,6 +16,14 @@ # limitations under the License. # +# This file tests the various functionalities in bin/utils.sh +# +# By default, this prints only the relevant error output at the end if tests fail. +# For debugging, the user can set SPARK_TESTING_VERBOSE to print more information +# while tests are still running. +# +# This script returns an exit code of 1 on test failure. + SPARK_HOME="$(cd `dirname $0`/..; pwd)" PROPERTIES_FILE="$SPARK_HOME/bin/test.conf" @@ -23,6 +31,46 @@ PROPERTIES_FILE="$SPARK_HOME/bin/test.conf" . "$SPARK_HOME/bin/utils.sh" tests_failed=0 +this_test_failed=0 +error_output_buffer="" +temp_output_buffer="" + +# Echo only if the verbose flag is set +verbose_echo() { + if [[ -n "$SPARK_TESTING_VERBOSE" ]]; then + echo -e "$1" + fi +} + +# Collect error output for echoing at the end if tests fail +# This also echoes the given string if the verbose flag is set +log_error() { + verbose_echo "$1" + if [[ -n "$error_output_buffer" ]]; then + error_output_buffer=$(echo -e "$error_output_buffer\n$1") + else + error_output_buffer="$1" + fi +} + +# Collect temporary output for logging +collect_temp_output() { + if [[ -n "$temp_output_buffer" ]]; then + temp_output_buffer=$(echo -e "$temp_output_buffer\n$1") + else + temp_output_buffer="$1" + fi +} + +# Print the result of an individual test +echo_test_result() { + if [[ "$this_test_failed" == 1 ]]; then + log_error "$temp_output_buffer" + tests_failed=1 + else + verbose_echo "$temp_output_buffer" + fi +} # Test parse_java_property. This takes in three parameters, the name of the config, # the expected value, and whether or not to ignore whitespace (e.g. for multi-line). @@ -30,20 +78,23 @@ test_parse_java_property() { key="$1" expected_value="$2" ignore_whitespace="$3" + temp_output_buffer="" + this_test_failed=0 parse_java_property "$key" actual_value="$JAVA_PROPERTY_VALUE" - echo " $key -> $actual_value" + collect_temp_output " $key -> $actual_value" # Ignore whitespace for multi-line arguments if [[ -n "$ignore_whitespace" ]]; then expected_value=$(echo "$expected_value" | sed "s/[[:space:]]//g") actual_value=$(echo "$actual_value" | sed "s/[[:space:]]//g") fi if [[ "$actual_value" != "$expected_value" ]]; then - echo " XXXXX TEST FAILED XXXXX" - echo " expected: $expected_value" - echo " actual: $actual_value" - tests_failed=1 + collect_temp_output " XXXXX TEST FAILED XXXXX" + collect_temp_output " expected: $expected_value" + collect_temp_output " actual: $actual_value" + this_test_failed=1 fi + echo_test_result } # Test split_java_options. This takes in three or more parameters, the name of the config, @@ -52,26 +103,30 @@ test_split_java_options() { key="$1" expected_size="$2" expected_values=("${@:3}") + temp_output_buffer="" + this_test_failed=0 parse_java_property "$key" - echo " $JAVA_PROPERTY_VALUE" + collect_temp_output " $JAVA_PROPERTY_VALUE" split_java_options "$JAVA_PROPERTY_VALUE" if [[ "$expected_size" != "${#SPLIT_JAVA_OPTS[@]}" ]]; then - echo " XXXXX TEST FAILED XXXXX" - echo " expected size: $expected_size" - echo " actual size: ${#SPLIT_JAVA_OPTS[@]}" + collect_temp_output " XXXXX TEST FAILED XXXXX" + collect_temp_output " expected size: $expected_size" + collect_temp_output " actual size: ${#SPLIT_JAVA_OPTS[@]}" + this_test_failed=1 fi for i in $(seq 0 $((expected_size - 1))); do expected_value="${expected_values[$i]}" actual_value="${SPLIT_JAVA_OPTS[$i]}" - echo " -> $actual_value" + collect_temp_output " -> $actual_value" if [[ "$expected_value" != "$actual_value" ]]; then - echo " XXXXX TEST FAILED (key $key) XXXXX" - echo " expected value: $expected_value" - echo " actual value: $actual_value" - tests_failed=1 + collect_temp_output " XXXXX TEST FAILED (key $key) XXXXX" + collect_temp_output " expected value: $expected_value" + collect_temp_output " actual value: $actual_value" + this_test_failed=1 break fi done + echo_test_result } # Test split_java_options. This takes in three or more parameters, the name of the config, @@ -80,26 +135,29 @@ test_quote_java_property() { key="$1" expected_size="$2" expected_values=("${@:3}") + temp_output_buffer="" + this_test_failed=0 parse_java_property "$key" split_java_options "$JAVA_PROPERTY_VALUE" quote_java_property "${SPLIT_JAVA_OPTS[@]}" - echo " $JAVA_PROPERTY_VALUE" + collect_temp_output " $JAVA_PROPERTY_VALUE" for i in $(seq 0 $((expected_size - 1))); do expected_value="${expected_values[$i]}" actual_value="${QUOTED_JAVA_OPTS[$i]}" - echo " -> $actual_value" + collect_temp_output " -> $actual_value" if [[ "$expected_value" != "$actual_value" ]]; then - echo " XXXXX TEST FAILED (key $key) XXXXX" - echo " expected value: $expected_value" - echo " actual value: $actual_value" - tests_failed=1 + collect_temp_output " XXXXX TEST FAILED (key $key) XXXXX" + collect_temp_output " expected value: $expected_value" + collect_temp_output " actual value: $actual_value" + this_test_failed=1 break fi done + echo_test_result } # Test parse_java_property. This should read the literal value as written in the conf file. -echo "--- Testing parse_java_property ---" +log_error "--- Testing parse_java_property ---" delimiters=("space" "equal" "colon") test_parse_java_property "does.not.exist" "" for delimiter in "${delimiters[@]}"; do @@ -120,14 +178,13 @@ for delimiter in "${delimiters[@]}"; do test_parse_java_property "spark.$delimiter.12" \ "-Dstraw=\"berry space\" -Dblue=\"berry \\\"quotes\\\"\" -Dblack=\"berry \\\\backslashes\\\\ \" -Dcherry=berry" IGNORE_WHITESPACE done -echo +log_error # Test split_java_options. Note that this relies on parse_java_property to work correctly. +log_error "--- Testing split_java_options ---" if [[ "$tests_failed" == 1 ]]; then - echo "* WARNING: Tests for parse_java_property failed!" - echo -e "This should also fail tests for split_java_options\n" + log_error "* WARNING: Tests for parse_java_property failed! This should also fail tests for split_java_options" fi -echo "--- Testing split_java_options ---" test_split_java_options "spark.space.1" 1 "-Dstraw=berry" test_split_java_options "spark.space.2" 1 "-Dstraw=berry" test_split_java_options "spark.space.3" 1 "-Dstraw=berry again" @@ -144,14 +201,13 @@ test_split_java_options "spark.space.11" 4 \ "-Dstraw=berry space" "-Dblue=berry \"quotes\"" "-Dblack=berry \\backslashes\\ " "-Dcherry=berry" test_split_java_options "spark.space.12" 4 \ "-Dstraw=berry space" "-Dblue=berry \"quotes\"" "-Dblack=berry \\backslashes\\ " "-Dcherry=berry" -echo +log_error # Test quote_java_property. Note that this relies on split_java_options to work correctly. +log_error "--- Testing quote_java_property ---" if [[ "$tests_failed" == 1 ]]; then - echo "* WARNING: Tests for split_java_options failed!" - echo -e "This should also fail tests for quote_java_property\n" + log_error "* WARNING: Tests for split_java_options failed! This should also fail tests for quote_java_property" fi -echo "--- Testing quote_java_property ---" test_quote_java_property "spark.space.1" 1 "\"-Dstraw=berry\"" test_quote_java_property "spark.space.2" 1 "\"-Dstraw=berry\"" test_quote_java_property "spark.space.3" 1 "\"-Dstraw=berry again\"" @@ -168,17 +224,14 @@ test_quote_java_property "spark.space.11" 4 \ "\"-Dstraw=berry space\"" "\"-Dblue=berry \"quotes\"\"" "\"-Dblack=berry \\backslashes\\ \"" "\"-Dcherry=berry\"" test_quote_java_property "spark.space.12" 4 \ "\"-Dstraw=berry space\"" "\"-Dblue=berry \"quotes\"\"" "\"-Dblack=berry \\backslashes\\ \"" "\"-Dcherry=berry\"" -echo +log_error # Final test result if [[ "$tests_failed" == 0 ]]; then - echo "**********************" - echo " TESTS PASS " - echo "**********************" + echo "BASH tests passed." else - echo "XXXXXXXXXXXXXXXXXXXXXXXX" - echo "XXXXX TESTS FAILED XXXXX" - echo "XXXXXXXXXXXXXXXXXXXXXXXX" + echo -e "XXXXX BASH tests failed XXXXX\n" + echo -e "$error_output_buffer" exit 1 fi diff --git a/dev/run-tests b/dev/run-tests index 0e24515d1376..0a10cfda2833 100755 --- a/dev/run-tests +++ b/dev/run-tests @@ -84,6 +84,12 @@ echo "Running Python style checks" echo "=========================================================================" dev/lint-python +echo "" +echo "=========================================================================" +echo "Running BASH tests" +echo "=========================================================================" +bin/run-tests + echo "" echo "=========================================================================" echo "Running Spark unit tests" From 56ac247400b3121abb4c0fa01bf7fc5a4fced60c Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Fri, 15 Aug 2014 20:31:31 -0700 Subject: [PATCH 28/53] Use eval and set to simplify splitting --- bin/utils.sh | 32 +++----------------------------- 1 file changed, 3 insertions(+), 29 deletions(-) diff --git a/bin/utils.sh b/bin/utils.sh index 1eaff0e50365..826109566796 100755 --- a/bin/utils.sh +++ b/bin/utils.sh @@ -58,36 +58,10 @@ function parse_java_property() { # Properly split java options, dealing with whitespace, double quotes and backslashes. # This accepts a string and returns the resulting list through SPLIT_JAVA_OPTS. +# For security reasons, this is isolated in its own function. function split_java_options() { - SPLIT_JAVA_OPTS=() # return value - option_buffer="" # buffer for collecting parts of an option - opened_quotes=0 # whether we are expecting a closing double quotes - for word in $1; do - num_quotes=$(echo "$word" | sed "s/\\\\\"//g" | grep -o "\"" | grep -c .) - if [[ $((num_quotes % 2)) == 1 ]]; then - # Flip the bit - opened_quotes=$(((opened_quotes + 1) % 2)) - fi - if [[ $opened_quotes == 0 ]]; then - # Remove all non-escaped quotes around the value - SPLIT_JAVA_OPTS+=("$( - echo "$option_buffer $word" | \ - sed "s/^[[:space:]]*//" | \ - sed "s/\([^\\]\)\"/\1/g" | \ - sed "s/\\\\\([\\\"]\)/\1/g" - )") - option_buffer="" - else - # We are expecting a closing double quote, so keep buffering - option_buffer="$option_buffer $word" - fi - done - # Something is wrong if we ended with open double quotes - if [[ $opened_quotes == 1 ]]; then - echo -e "Java options parse error! Expecting closing double quotes:" 1>&2 - echo -e " ${SPLIT_JAVA_OPTS[@]}" 1>&2 - exit 1 - fi + eval set -- "$1" + SPLIT_JAVA_OPTS=("$@") export SPLIT_JAVA_OPTS } From bd0d468de50d08c661f5239e48a32b4c82790ed0 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Fri, 15 Aug 2014 20:52:16 -0700 Subject: [PATCH 29/53] Simplify parsing config file by ignoring multi-line arguments --- bin/utils.sh | 42 +++++++++++------------------------------- 1 file changed, 11 insertions(+), 31 deletions(-) diff --git a/bin/utils.sh b/bin/utils.sh index 826109566796..066a73e00090 100755 --- a/bin/utils.sh +++ b/bin/utils.sh @@ -21,37 +21,17 @@ # * ---------------------------------------------------- * # Parse the value of a config from a java properties file according to the specifications in -# http://docs.oracle.com/javase/7/docs/api/java/util/Properties.html#load(java.io.Reader). -# This accepts the name of the config as an argument, and expects the path of the property -# file to be found in PROPERTIES_FILE. The value is returned through JAVA_PROPERTY_VALUE. +# http://docs.oracle.com/javase/7/docs/api/java/util/Properties.html#load(java.io.Reader), +# with the exception of the support for multi-line arguments. This accepts the name of the +# config as an argument, and expects the path of the property file to be found in +# PROPERTIES_FILE. The value is returned through JAVA_PROPERTY_VALUE. function parse_java_property() { - JAVA_PROPERTY_VALUE="" # return value - config_buffer="" # buffer for collecting parts of a config value - multi_line=0 # whether this config is spanning multiple lines - while read -r line; do - # Strip leading and trailing whitespace - line=$(echo "$line" | sed "s/^[[:space:]]\(.*\)[[:space:]]*$/\1/") - contains_config=$(echo "$line" | grep -e "^$1") - if [[ -n "$contains_config" || "$multi_line" == 1 ]]; then - has_more_lines=$(echo "$line" | grep -e "\\\\$") - if [[ -n "$has_more_lines" ]]; then - # Strip trailing backslash - line=$(echo "$line" | sed "s/\\\\$//") - config_buffer="$config_buffer $line" - multi_line=1 - else - JAVA_PROPERTY_VALUE="$config_buffer $line" - break - fi - fi - done < "$PROPERTIES_FILE" - - # Actually extract the value of the config - JAVA_PROPERTY_VALUE=$( \ - echo "$JAVA_PROPERTY_VALUE" | \ - sed "s/$1//" | \ - sed "s/^[[:space:]]*[:=]\{0,1\}//" | \ - sed "s/^[[:space:]]*\(.*\)[[:space:]]*$/\1/g" \ + JAVA_PROPERTY_VALUE=$(\ + grep "^[[:space:]]*$1" "$PROPERTIES_FILE" | \ + sed "s/^[[:space:]]*$1//g" | \ + sed "s/^[[:space:]]*[:=]\{0,1\}//g" | \ + sed "s/^[[:space:]]*//g" | \ + sed "s/[[:space:]]*$//g" ) export JAVA_PROPERTY_VALUE } @@ -82,7 +62,6 @@ function quote_java_property() { # Gather all all spark-submit options into SUBMISSION_OPTS function gatherSparkSubmitOpts() { - if [ -z "$SUBMIT_USAGE_FUNCTION" ]; then echo "Function for printing usage of $0 is not set." 1>&2 echo "Please set usage function to shell variable 'SUBMIT_USAGE_FUNCTION' in $0" 1>&2 @@ -120,3 +99,4 @@ function gatherSparkSubmitOpts() { export SUBMISSION_OPTS export APPLICATION_OPTS } + From be99eb3f124dc3f2df3408e4bff6bfa9ad2362e2 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Fri, 15 Aug 2014 20:58:13 -0700 Subject: [PATCH 30/53] Fix tests to not include multi-line configs --- bin/run-tests | 12 ------------ bin/test.conf | 21 --------------------- bin/utils.sh | 1 + 3 files changed, 1 insertion(+), 33 deletions(-) diff --git a/bin/run-tests b/bin/run-tests index 58b111244d16..18e02d902469 100755 --- a/bin/run-tests +++ b/bin/run-tests @@ -173,10 +173,6 @@ for delimiter in "${delimiters[@]}"; do "-Dstraw=\"berry space\" -Dblue=\"berry \\\"quotes\\\"\" -Dblack=\"berry \\\\backslashes\\\\ \"" test_parse_java_property "spark.$delimiter.10" \ "-Dstraw=\"berry space\" -Dblue=\"berry \\\"quotes\\\"\" -Dblack=\"berry \\\\backslashes\\\\ \"" IGNORE_WHITESPACE - test_parse_java_property "spark.$delimiter.11" \ - "-Dstraw=\"berry space\" -Dblue=\"berry \\\"quotes\\\"\" -Dblack=\"berry \\\\backslashes\\\\ \" -Dcherry=berry" IGNORE_WHITESPACE - test_parse_java_property "spark.$delimiter.12" \ - "-Dstraw=\"berry space\" -Dblue=\"berry \\\"quotes\\\"\" -Dblack=\"berry \\\\backslashes\\\\ \" -Dcherry=berry" IGNORE_WHITESPACE done log_error @@ -197,10 +193,6 @@ test_split_java_options "spark.space.9" 3 \ "-Dstraw=berry space" "-Dblue=berry \"quotes\"" "-Dblack=berry \\backslashes\\ " test_split_java_options "spark.space.10" 3 \ "-Dstraw=berry space" "-Dblue=berry \"quotes\"" "-Dblack=berry \\backslashes\\ " -test_split_java_options "spark.space.11" 4 \ - "-Dstraw=berry space" "-Dblue=berry \"quotes\"" "-Dblack=berry \\backslashes\\ " "-Dcherry=berry" -test_split_java_options "spark.space.12" 4 \ - "-Dstraw=berry space" "-Dblue=berry \"quotes\"" "-Dblack=berry \\backslashes\\ " "-Dcherry=berry" log_error # Test quote_java_property. Note that this relies on split_java_options to work correctly. @@ -220,10 +212,6 @@ test_quote_java_property "spark.space.9" 3 \ "\"-Dstraw=berry space\"" "\"-Dblue=berry \"quotes\"\"" "\"-Dblack=berry \\backslashes\\ \"" test_quote_java_property "spark.space.10" 3 \ "\"-Dstraw=berry space\"" "\"-Dblue=berry \"quotes\"\"" "\"-Dblack=berry \\backslashes\\ \"" -test_quote_java_property "spark.space.11" 4 \ - "\"-Dstraw=berry space\"" "\"-Dblue=berry \"quotes\"\"" "\"-Dblack=berry \\backslashes\\ \"" "\"-Dcherry=berry\"" -test_quote_java_property "spark.space.12" 4 \ - "\"-Dstraw=berry space\"" "\"-Dblue=berry \"quotes\"\"" "\"-Dblack=berry \\backslashes\\ \"" "\"-Dcherry=berry\"" log_error # Final test result diff --git a/bin/test.conf b/bin/test.conf index cb82e2e684dd..437d732d7565 100644 --- a/bin/test.conf +++ b/bin/test.conf @@ -18,13 +18,6 @@ spark.space.7 -Dstraw=berry -Dblue=berry -Dblack=berry spark.space.8 -Dstraw="berry space" -Dblue="berry" -Dblack=berry spark.space.9 -Dstraw="berry space" -Dblue="berry \"quotes\"" -Dblack="berry \\backslashes\\ " spark.space.10 -Dstraw="berry space" -Dblue="berry \"quotes\"" -Dblack="berry \\backslashes\\ " -spark.space.11 \ - -Dstraw="berry space" \ - -Dblue="berry \"quotes\"" \ - -Dblack="berry \\backslashes\\ " \ - -Dcherry=berry -spark.space.12 -Dstraw="berry space" -Dblue="berry \"quotes\"" \ - -Dblack="berry \\backslashes\\ " -Dcherry=berry # Equal sign delimiter spark.equal.1=-Dstraw=berry @@ -37,13 +30,6 @@ spark.equal.7=-Dstraw=berry -Dblue=berry -Dblack=berry spark.equal.8=-Dstraw="berry space" -Dblue="berry" -Dblack=berry spark.equal.9=-Dstraw="berry space" -Dblue="berry \"quotes\"" -Dblack="berry \\backslashes\\ " spark.equal.10 = -Dstraw="berry space" -Dblue="berry \"quotes\"" -Dblack="berry \\backslashes\\ " -spark.equal.11=\ - -Dstraw="berry space" \ - -Dblue="berry \"quotes\"" \ - -Dblack="berry \\backslashes\\ " \ - -Dcherry=berry -spark.equal.12=-Dstraw="berry space" -Dblue="berry \"quotes\"" \ - -Dblack="berry \\backslashes\\ " -Dcherry=berry # Colon delimiter spark.colon.1:-Dstraw=berry @@ -56,11 +42,4 @@ spark.colon.7:-Dstraw=berry -Dblue=berry -Dblack=berry spark.colon.8:-Dstraw="berry space" -Dblue="berry" -Dblack=berry spark.colon.9:-Dstraw="berry space" -Dblue="berry \"quotes\"" -Dblack="berry \\backslashes\\ " spark.colon.10 : -Dstraw="berry space" -Dblue="berry \"quotes\"" -Dblack="berry \\backslashes\\ " -spark.colon.11:\ - -Dstraw="berry space" \ - -Dblue="berry \"quotes\"" \ - -Dblack="berry \\backslashes\\ " \ - -Dcherry=berry -spark.colon.12:-Dstraw="berry space" -Dblue="berry \"quotes\"" \ - -Dblack="berry \\backslashes\\ " -Dcherry=berry diff --git a/bin/utils.sh b/bin/utils.sh index 066a73e00090..c9ed977be143 100755 --- a/bin/utils.sh +++ b/bin/utils.sh @@ -28,6 +28,7 @@ function parse_java_property() { JAVA_PROPERTY_VALUE=$(\ grep "^[[:space:]]*$1" "$PROPERTIES_FILE" | \ + head -n 1 | \ sed "s/^[[:space:]]*$1//g" | \ sed "s/^[[:space:]]*[:=]\{0,1\}//g" | \ sed "s/^[[:space:]]*//g" | \ From 371cac42154ac263d5c94c567c1f222e2edd7e38 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Fri, 15 Aug 2014 20:59:44 -0700 Subject: [PATCH 31/53] Add function prefix (minor) --- bin/run-tests | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/bin/run-tests b/bin/run-tests index 18e02d902469..ce489f33c001 100755 --- a/bin/run-tests +++ b/bin/run-tests @@ -36,7 +36,7 @@ error_output_buffer="" temp_output_buffer="" # Echo only if the verbose flag is set -verbose_echo() { +function verbose_echo() { if [[ -n "$SPARK_TESTING_VERBOSE" ]]; then echo -e "$1" fi @@ -44,7 +44,7 @@ verbose_echo() { # Collect error output for echoing at the end if tests fail # This also echoes the given string if the verbose flag is set -log_error() { +function log_error() { verbose_echo "$1" if [[ -n "$error_output_buffer" ]]; then error_output_buffer=$(echo -e "$error_output_buffer\n$1") @@ -54,7 +54,7 @@ log_error() { } # Collect temporary output for logging -collect_temp_output() { +function collect_temp_output() { if [[ -n "$temp_output_buffer" ]]; then temp_output_buffer=$(echo -e "$temp_output_buffer\n$1") else @@ -63,7 +63,7 @@ collect_temp_output() { } # Print the result of an individual test -echo_test_result() { +function echo_test_result() { if [[ "$this_test_failed" == 1 ]]; then log_error "$temp_output_buffer" tests_failed=1 @@ -74,7 +74,7 @@ echo_test_result() { # Test parse_java_property. This takes in three parameters, the name of the config, # the expected value, and whether or not to ignore whitespace (e.g. for multi-line). -test_parse_java_property() { +function test_parse_java_property() { key="$1" expected_value="$2" ignore_whitespace="$3" @@ -99,7 +99,7 @@ test_parse_java_property() { # Test split_java_options. This takes in three or more parameters, the name of the config, # the expected number of java options, and values of the java options themselves. -test_split_java_options() { +function test_split_java_options() { key="$1" expected_size="$2" expected_values=("${@:3}") @@ -131,7 +131,7 @@ test_split_java_options() { # Test split_java_options. This takes in three or more parameters, the name of the config, # the expected number of java options, and values of the java options themselves. -test_quote_java_property() { +function test_quote_java_property() { key="$1" expected_size="$2" expected_values=("${@:3}") From fa11ef8d382a8c21518b28605bc381738f688da3 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Fri, 15 Aug 2014 21:11:14 -0700 Subject: [PATCH 32/53] Parse the properties file only if the special configs exist --- bin/spark-submit | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/bin/spark-submit b/bin/spark-submit index 0eab865a5baf..73555b29835f 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -60,22 +60,27 @@ unset EXECUTOR_EXTRA_JAVA_OPTIONS # if [[ -f "$PROPERTIES_FILE" ]]; then echo "Using properties file $PROPERTIES_FILE." 1>&2 - # This exports the value of the given key into JAVA_PROPERTY_VALUE - parse_java_property "spark.driver.memory" - DRIVER_MEMORY_CONF="$JAVA_PROPERTY_VALUE" - parse_java_property "spark.driver.extraLibraryPath" - DRIVER_EXTRA_LIBRARY_PATH="$JAVA_PROPERTY_VALUE" - parse_java_property "spark.driver.extraClassPath" - DRIVER_EXTRA_CLASSPATH="$JAVA_PROPERTY_VALUE" - parse_java_property "spark.driver.extraJavaOptions" - DRIVER_EXTRA_JAVA_OPTS="$JAVA_PROPERTY_VALUE" - parse_java_property "spark.executor.extraJavaOptions" - EXECUTOR_EXTRA_JAVA_OPTS="$JAVA_PROPERTY_VALUE" - if [[ -n "DRIVER_EXTRA_JAVA_OPTS" ]]; then - export DRIVER_EXTRA_JAVA_OPTS - fi - if [[ -n "EXECUTOR_EXTRA_JAVA_OPTS" ]]; then - export EXECUTOR_EXTRA_JAVA_OPTS + # Parse the properties file here only if these special configs exist + should_parse=$(grep -e "spark.driver.extra*\|spark.*.extraJavaOptions" "$PROPERTIES_FILE") + if [[ -n "$should_parse" ]]; then + # This exports the value of the given key into JAVA_PROPERTY_VALUE + parse_java_property "spark.driver.memory" + DRIVER_MEMORY_CONF="$JAVA_PROPERTY_VALUE" + parse_java_property "spark.driver.extraLibraryPath" + DRIVER_EXTRA_LIBRARY_PATH="$JAVA_PROPERTY_VALUE" + parse_java_property "spark.driver.extraClassPath" + DRIVER_EXTRA_CLASSPATH="$JAVA_PROPERTY_VALUE" + parse_java_property "spark.driver.extraJavaOptions" + DRIVER_EXTRA_JAVA_OPTS="$JAVA_PROPERTY_VALUE" + parse_java_property "spark.executor.extraJavaOptions" + EXECUTOR_EXTRA_JAVA_OPTS="$JAVA_PROPERTY_VALUE" + # Export these for SparkSubmitArguments.scala to consume + if [[ -n "DRIVER_EXTRA_JAVA_OPTS" ]]; then + export DRIVER_EXTRA_JAVA_OPTS + fi + if [[ -n "EXECUTOR_EXTRA_JAVA_OPTS" ]]; then + export EXECUTOR_EXTRA_JAVA_OPTS + fi fi elif [[ "$PROPERTIES_FILE" != "$DEFAULT_PROPERTIES_FILE" ]]; then echo "Warning: properties file $PROPERTIES_FILE does not exist." 1>&2 From 7396be27b7bbaaf8e28ba6958d94867b3c35ff02 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Fri, 15 Aug 2014 21:14:24 -0700 Subject: [PATCH 33/53] Explicitly comment that multi-line properties are not supported --- conf/spark-defaults.conf.template | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/spark-defaults.conf.template b/conf/spark-defaults.conf.template index ad7273d830c1..80b72b0ca2d2 100644 --- a/conf/spark-defaults.conf.template +++ b/conf/spark-defaults.conf.template @@ -1,5 +1,6 @@ # Default system properties included when running spark-submit. # This is useful for setting default environmental settings. +# Note that properties that span multiple lines are not supported. # Example: # spark.master spark://master:7077 From c886568687adfda65ae27d0fefa184b345a770ad Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Fri, 15 Aug 2014 21:25:47 -0700 Subject: [PATCH 34/53] Fix lines too long + a few comments / style (minor) --- bin/pyspark | 3 ++- bin/run-tests | 31 ++++++++++++++++++++----------- bin/spark-class | 2 +- bin/spark-shell | 4 +++- 4 files changed, 26 insertions(+), 14 deletions(-) diff --git a/bin/pyspark b/bin/pyspark index 01d42025c978..35a20ae4fcb2 100755 --- a/bin/pyspark +++ b/bin/pyspark @@ -23,7 +23,8 @@ FWDIR="$(cd `dirname $0`/..; pwd)" # Export this as SPARK_HOME export SPARK_HOME="$FWDIR" -source $FWDIR/bin/utils.sh +# Load utility functions +. "$FWDIR/bin/utils.sh" SCALA_VERSION=2.10 diff --git a/bin/run-tests b/bin/run-tests index ce489f33c001..a514642e317b 100755 --- a/bin/run-tests +++ b/bin/run-tests @@ -72,8 +72,8 @@ function echo_test_result() { fi } -# Test parse_java_property. This takes in three parameters, the name of the config, -# the expected value, and whether or not to ignore whitespace (e.g. for multi-line). +# Test parse_java_property. This takes in three parameters, the name of +# the config, the expected value, and whether or not to ignore whitespace. function test_parse_java_property() { key="$1" expected_value="$2" @@ -166,20 +166,25 @@ for delimiter in "${delimiters[@]}"; do test_parse_java_property "spark.$delimiter.3" "-Dstraw=\"berry again\"" test_parse_java_property "spark.$delimiter.4" "-Dstraw=\"berry \\\"quote\"" test_parse_java_property "spark.$delimiter.5" "-Dstraw=\"berry \\\\backslash\"" - test_parse_java_property "spark.$delimiter.6" "-Dstraw=\"berry \\\"quotes\\\" and \\\\backslashes\\\\ \"" - test_parse_java_property "spark.$delimiter.7" "-Dstraw=berry -Dblue=berry -Dblack=berry" - test_parse_java_property "spark.$delimiter.8" "-Dstraw=\"berry space\" -Dblue=\"berry\" -Dblack=berry" + test_parse_java_property "spark.$delimiter.6" \ + "-Dstraw=\"berry \\\"quotes\\\" and \\\\backslashes\\\\ \"" + test_parse_java_property "spark.$delimiter.7" \ + "-Dstraw=berry -Dblue=berry -Dblack=berry" + test_parse_java_property "spark.$delimiter.8" \ + "-Dstraw=\"berry space\" -Dblue=\"berry\" -Dblack=berry" test_parse_java_property "spark.$delimiter.9" \ "-Dstraw=\"berry space\" -Dblue=\"berry \\\"quotes\\\"\" -Dblack=\"berry \\\\backslashes\\\\ \"" test_parse_java_property "spark.$delimiter.10" \ - "-Dstraw=\"berry space\" -Dblue=\"berry \\\"quotes\\\"\" -Dblack=\"berry \\\\backslashes\\\\ \"" IGNORE_WHITESPACE + "-Dstraw=\"berry space\" -Dblue=\"berry \\\"quotes\\\"\" -Dblack=\"berry \\\\backslashes\\\\ \"" \ + IGNORE_WHITESPACE done log_error # Test split_java_options. Note that this relies on parse_java_property to work correctly. log_error "--- Testing split_java_options ---" if [[ "$tests_failed" == 1 ]]; then - log_error "* WARNING: Tests for parse_java_property failed! This should also fail tests for split_java_options" + log_error "* WARNING: Tests for parse_java_property failed!" + log_error "This should also fail tests for split_java_options" fi test_split_java_options "spark.space.1" 1 "-Dstraw=berry" test_split_java_options "spark.space.2" 1 "-Dstraw=berry" @@ -198,16 +203,20 @@ log_error # Test quote_java_property. Note that this relies on split_java_options to work correctly. log_error "--- Testing quote_java_property ---" if [[ "$tests_failed" == 1 ]]; then - log_error "* WARNING: Tests for split_java_options failed! This should also fail tests for quote_java_property" + log_error "* WARNING: Tests for split_java_options failed!" + log_error "This should also fail tests for quote_java_property" fi test_quote_java_property "spark.space.1" 1 "\"-Dstraw=berry\"" test_quote_java_property "spark.space.2" 1 "\"-Dstraw=berry\"" test_quote_java_property "spark.space.3" 1 "\"-Dstraw=berry again\"" test_quote_java_property "spark.space.4" 1 "\"-Dstraw=berry \"quote\"" test_quote_java_property "spark.space.5" 1 "\"-Dstraw=berry \\backslash\"" -test_quote_java_property "spark.space.6" 1 "\"-Dstraw=berry \"quotes\" and \\backslashes\\ \"" -test_quote_java_property "spark.space.7" 3 "\"-Dstraw=berry\"" "\"-Dblue=berry\"" "\"-Dblack=berry\"" -test_quote_java_property "spark.space.8" 3 "\"-Dstraw=berry space\"" "\"-Dblue=berry\"" "\"-Dblack=berry\"" +test_quote_java_property "spark.space.6" 1 \ + "\"-Dstraw=berry \"quotes\" and \\backslashes\\ \"" +test_quote_java_property "spark.space.7" 3 \ + "\"-Dstraw=berry\"" "\"-Dblue=berry\"" "\"-Dblack=berry\"" +test_quote_java_property "spark.space.8" 3 \ + "\"-Dstraw=berry space\"" "\"-Dblue=berry\"" "\"-Dblack=berry\"" test_quote_java_property "spark.space.9" 3 \ "\"-Dstraw=berry space\"" "\"-Dblue=berry \"quotes\"\"" "\"-Dblack=berry \\backslashes\\ \"" test_quote_java_property "spark.space.10" 3 \ diff --git a/bin/spark-class b/bin/spark-class index ce5bebe5929e..8390af449288 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -157,7 +157,7 @@ fi export CLASSPATH if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then - # Put quotes around system properties in case they contain spaces + # Put quotes around system properties in case they contain spaces for readability # This exports the resulting list of java opts into QUOTED_JAVA_OPTS quote_java_property "${SPLIT_JAVA_OPTS[@]}" echo -n "Spark Command: " 1>&2 diff --git a/bin/spark-shell b/bin/spark-shell index 8b7ccd743955..d3381179d8bc 100755 --- a/bin/spark-shell +++ b/bin/spark-shell @@ -41,7 +41,9 @@ if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then usage fi -source $FWDIR/bin/utils.sh +# Load utility functions +. "$FWDIR/bin/utils.sh" + SUBMIT_USAGE_FUNCTION=usage gatherSparkSubmitOpts "$@" From 0effa1ee5ed302f0533522431db3620919dfbe61 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Mon, 18 Aug 2014 18:13:13 -0700 Subject: [PATCH 35/53] Add code in Scala that handles special configs The eventual goal of this is to shift the current complex BASH logic to Scala. The new class should be invoked from `spark-class`. For simplicity, this currently does not handle SPARK-2914. It is likely that this will be dealt with in a future PR instead. --- .../apache/spark/api/python/PythonUtils.scala | 25 ---- .../api/python/PythonWorkerFactory.scala | 3 +- .../apache/spark/deploy/PythonRunner.scala | 4 +- .../spark/deploy/SparkClassLauncher.scala | 118 ++++++++++++++++++ .../scala/org/apache/spark/util/Utils.scala | 21 ++++ 5 files changed, 142 insertions(+), 29 deletions(-) create mode 100644 core/src/main/scala/org/apache/spark/deploy/SparkClassLauncher.scala diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala index 52c70712eea3..be5ebfa9219d 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala @@ -40,28 +40,3 @@ private[spark] object PythonUtils { paths.filter(_ != "").mkString(File.pathSeparator) } } - - -/** - * A utility class to redirect the child process's stdout or stderr. - */ -private[spark] class RedirectThread( - in: InputStream, - out: OutputStream, - name: String) - extends Thread(name) { - - setDaemon(true) - override def run() { - scala.util.control.Exception.ignoring(classOf[IOException]) { - // FIXME: We copy the stream on the level of bytes to avoid encoding problems. - val buf = new Array[Byte](1024) - var len = in.read(buf) - while (len != -1) { - out.write(buf, 0, len) - out.flush() - len = in.read(buf) - } - } - } -} diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala index bf716a8ab025..4c4796f6c59b 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala @@ -17,7 +17,6 @@ package org.apache.spark.api.python -import java.lang.Runtime import java.io.{DataOutputStream, DataInputStream, InputStream, OutputStreamWriter} import java.net.{InetAddress, ServerSocket, Socket, SocketException} @@ -25,7 +24,7 @@ import scala.collection.mutable import scala.collection.JavaConversions._ import org.apache.spark._ -import org.apache.spark.util.Utils +import org.apache.spark.util.{RedirectThread, Utils} private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String, String]) extends Logging { diff --git a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala index 0d6751f3fa6d..b66c3ba4d5fb 100644 --- a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala +++ b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala @@ -22,8 +22,8 @@ import java.net.URI import scala.collection.mutable.ArrayBuffer import scala.collection.JavaConversions._ -import org.apache.spark.api.python.{PythonUtils, RedirectThread} -import org.apache.spark.util.Utils +import org.apache.spark.api.python.PythonUtils +import org.apache.spark.util.{RedirectThread, Utils} /** * A main class used by spark-submit to launch Python applications. It executes python as a diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkClassLauncher.scala b/core/src/main/scala/org/apache/spark/deploy/SparkClassLauncher.scala new file mode 100644 index 000000000000..8acabc591cb2 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/deploy/SparkClassLauncher.scala @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy + +import java.io.File + +import scala.collection.JavaConversions._ + +import org.apache.spark.util.{RedirectThread, Utils} + +/** + * Wrapper of `bin/spark-class` that prepares the launch environment of the child JVM properly. + */ +object SparkClassLauncher { + + /** + * Launch a Spark class with the given class paths, library paths, java options and memory. + * If we are launching an application through Spark submit in client mode, we must also + * take into account special `spark.driver.*` properties needed to start the driver JVM. + */ + def main(args: Array[String]): Unit = { + if (args.size < 8) { + System.err.println( + """ + |Usage: org.apache.spark.deploy.SparkClassLauncher + | + | [properties file] - path to your Spark properties file + | [java runner] - command to launch the child JVM + | [java class paths] - class paths to pass to the child JVM + | [java library paths] - library paths to pass to the child JVM + | [java opts] - java options to pass to the child JVM + | [java memory] - memory used to launch the child JVM + | [client mode] - whether the child JVM will run the Spark driver + | [main class] - main class to run in the child JVM + |
- arguments passed to this main class + | + |Example: + | org.apache.spark.deploy.SparkClassLauncher.SparkClassLauncher + | conf/spark-defaults.conf java /classpath1:/classpath2 /librarypath1:/librarypath2 + | "-XX:-UseParallelGC -Dsome=property" 5g true org.apache.spark.deploy.SparkSubmit + | --master local --class org.apache.spark.examples.SparkPi 10 + """.stripMargin) + System.exit(1) + } + val propertiesFile = args(0) + val javaRunner = args(1) + val clClassPaths = args(2) + val clLibraryPaths = args(3) + val clJavaOpts = args(4) + val clJavaMemory = args(5) + val clientMode = args(6) == "true" + val mainClass = args(7) + + // In client deploy mode, parse the properties file for certain `spark.driver.*` configs. + // These configs encode java options, class paths, and library paths needed to launch the JVM. + val properties = + if (clientMode) { + SparkSubmitArguments.getPropertiesFromFile(new File(propertiesFile)).toMap + } else { + Map[String, String]() + } + val confDriverMemory = properties.get("spark.driver.memory") + val confClassPaths = properties.get("spark.driver.extraClassPath") + val confLibraryPaths = properties.get("spark.driver.extraLibraryPath") + val confJavaOpts = properties.get("spark.driver.extraJavaOptions") + + // Merge relevant command line values with the config equivalents, if any + val javaMemory = + if (clientMode) { + confDriverMemory.getOrElse(clJavaMemory) + } else { + clJavaMemory + } + val pathSeparator = sys.props("path.separator") + val classPaths = clClassPaths + confClassPaths.map(pathSeparator + _).getOrElse("") + val libraryPaths = clLibraryPaths + confLibraryPaths.map(pathSeparator + _).getOrElse("") + val javaOpts = Utils.splitCommandString(clJavaOpts) ++ + confJavaOpts.map(Utils.splitCommandString).getOrElse(Seq.empty) + val filteredJavaOpts = javaOpts.filterNot { opt => + opt.startsWith("-Djava.library.path") || opt.startsWith("-Xms") || opt.startsWith("-Xmx") + } + + // Build up command + val command: Seq[String] = + Seq(javaRunner) ++ + { if (classPaths.nonEmpty) Seq("-cp", classPaths) else Seq.empty } ++ + { if (libraryPaths.nonEmpty) Seq(s"-Djava.library.path=$libraryPaths") else Seq.empty } ++ + filteredJavaOpts ++ + Seq(s"-Xms$javaMemory", s"-Xmx$javaMemory") ++ + Seq(mainClass) ++ + args.slice(8, args.size) + + command.foreach(println) + + val builder = new ProcessBuilder(command) + val process = builder.start() + new RedirectThread(System.in, process.getOutputStream, "redirect stdin").start() + new RedirectThread(process.getInputStream, System.out, "redirect stdout").start() + new RedirectThread(process.getErrorStream, System.err, "redirect stderr").start() + System.exit(process.waitFor()) + } + +} diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 019f68b16089..79fd4060f57e 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -1421,3 +1421,24 @@ private[spark] object Utils extends Logging { } } + +/** + * A utility class to redirect the child process's stdout or stderr. + */ +private[spark] class RedirectThread(in: InputStream, out: OutputStream, name: String) + extends Thread(name) { + + setDaemon(true) + override def run() { + scala.util.control.Exception.ignoring(classOf[IOException]) { + // FIXME: We copy the stream on the level of bytes to avoid encoding problems. + val buf = new Array[Byte](1024) + var len = in.read(buf) + while (len != -1) { + out.write(buf, 0, len) + out.flush() + len = in.read(buf) + } + } + } +} From a396eda8354908fe2937d0e82853d2ac6270eef8 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Mon, 18 Aug 2014 19:01:25 -0700 Subject: [PATCH 36/53] Nullify my own hard work to simplify bash :( --- bin/run-tests | 234 ------------------ bin/spark-class | 33 ++- bin/spark-submit | 70 +----- bin/test.conf | 45 ---- bin/utils.sh | 41 --- conf/spark-defaults.conf.template | 3 +- .../spark/deploy/SparkClassLauncher.scala | 9 +- .../spark/deploy/SparkSubmitArguments.scala | 9 - 8 files changed, 34 insertions(+), 410 deletions(-) delete mode 100755 bin/run-tests delete mode 100644 bin/test.conf diff --git a/bin/run-tests b/bin/run-tests deleted file mode 100755 index a514642e317b..000000000000 --- a/bin/run-tests +++ /dev/null @@ -1,234 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# This file tests the various functionalities in bin/utils.sh -# -# By default, this prints only the relevant error output at the end if tests fail. -# For debugging, the user can set SPARK_TESTING_VERBOSE to print more information -# while tests are still running. -# -# This script returns an exit code of 1 on test failure. - -SPARK_HOME="$(cd `dirname $0`/..; pwd)" -PROPERTIES_FILE="$SPARK_HOME/bin/test.conf" - -# Load utility functions -. "$SPARK_HOME/bin/utils.sh" - -tests_failed=0 -this_test_failed=0 -error_output_buffer="" -temp_output_buffer="" - -# Echo only if the verbose flag is set -function verbose_echo() { - if [[ -n "$SPARK_TESTING_VERBOSE" ]]; then - echo -e "$1" - fi -} - -# Collect error output for echoing at the end if tests fail -# This also echoes the given string if the verbose flag is set -function log_error() { - verbose_echo "$1" - if [[ -n "$error_output_buffer" ]]; then - error_output_buffer=$(echo -e "$error_output_buffer\n$1") - else - error_output_buffer="$1" - fi -} - -# Collect temporary output for logging -function collect_temp_output() { - if [[ -n "$temp_output_buffer" ]]; then - temp_output_buffer=$(echo -e "$temp_output_buffer\n$1") - else - temp_output_buffer="$1" - fi -} - -# Print the result of an individual test -function echo_test_result() { - if [[ "$this_test_failed" == 1 ]]; then - log_error "$temp_output_buffer" - tests_failed=1 - else - verbose_echo "$temp_output_buffer" - fi -} - -# Test parse_java_property. This takes in three parameters, the name of -# the config, the expected value, and whether or not to ignore whitespace. -function test_parse_java_property() { - key="$1" - expected_value="$2" - ignore_whitespace="$3" - temp_output_buffer="" - this_test_failed=0 - parse_java_property "$key" - actual_value="$JAVA_PROPERTY_VALUE" - collect_temp_output " $key -> $actual_value" - # Ignore whitespace for multi-line arguments - if [[ -n "$ignore_whitespace" ]]; then - expected_value=$(echo "$expected_value" | sed "s/[[:space:]]//g") - actual_value=$(echo "$actual_value" | sed "s/[[:space:]]//g") - fi - if [[ "$actual_value" != "$expected_value" ]]; then - collect_temp_output " XXXXX TEST FAILED XXXXX" - collect_temp_output " expected: $expected_value" - collect_temp_output " actual: $actual_value" - this_test_failed=1 - fi - echo_test_result -} - -# Test split_java_options. This takes in three or more parameters, the name of the config, -# the expected number of java options, and values of the java options themselves. -function test_split_java_options() { - key="$1" - expected_size="$2" - expected_values=("${@:3}") - temp_output_buffer="" - this_test_failed=0 - parse_java_property "$key" - collect_temp_output " $JAVA_PROPERTY_VALUE" - split_java_options "$JAVA_PROPERTY_VALUE" - if [[ "$expected_size" != "${#SPLIT_JAVA_OPTS[@]}" ]]; then - collect_temp_output " XXXXX TEST FAILED XXXXX" - collect_temp_output " expected size: $expected_size" - collect_temp_output " actual size: ${#SPLIT_JAVA_OPTS[@]}" - this_test_failed=1 - fi - for i in $(seq 0 $((expected_size - 1))); do - expected_value="${expected_values[$i]}" - actual_value="${SPLIT_JAVA_OPTS[$i]}" - collect_temp_output " -> $actual_value" - if [[ "$expected_value" != "$actual_value" ]]; then - collect_temp_output " XXXXX TEST FAILED (key $key) XXXXX" - collect_temp_output " expected value: $expected_value" - collect_temp_output " actual value: $actual_value" - this_test_failed=1 - break - fi - done - echo_test_result -} - -# Test split_java_options. This takes in three or more parameters, the name of the config, -# the expected number of java options, and values of the java options themselves. -function test_quote_java_property() { - key="$1" - expected_size="$2" - expected_values=("${@:3}") - temp_output_buffer="" - this_test_failed=0 - parse_java_property "$key" - split_java_options "$JAVA_PROPERTY_VALUE" - quote_java_property "${SPLIT_JAVA_OPTS[@]}" - collect_temp_output " $JAVA_PROPERTY_VALUE" - for i in $(seq 0 $((expected_size - 1))); do - expected_value="${expected_values[$i]}" - actual_value="${QUOTED_JAVA_OPTS[$i]}" - collect_temp_output " -> $actual_value" - if [[ "$expected_value" != "$actual_value" ]]; then - collect_temp_output " XXXXX TEST FAILED (key $key) XXXXX" - collect_temp_output " expected value: $expected_value" - collect_temp_output " actual value: $actual_value" - this_test_failed=1 - break - fi - done - echo_test_result -} - -# Test parse_java_property. This should read the literal value as written in the conf file. -log_error "--- Testing parse_java_property ---" -delimiters=("space" "equal" "colon") -test_parse_java_property "does.not.exist" "" -for delimiter in "${delimiters[@]}"; do - test_parse_java_property "spark.$delimiter.1" "-Dstraw=berry" - test_parse_java_property "spark.$delimiter.2" "-Dstraw=\"berry\"" - test_parse_java_property "spark.$delimiter.3" "-Dstraw=\"berry again\"" - test_parse_java_property "spark.$delimiter.4" "-Dstraw=\"berry \\\"quote\"" - test_parse_java_property "spark.$delimiter.5" "-Dstraw=\"berry \\\\backslash\"" - test_parse_java_property "spark.$delimiter.6" \ - "-Dstraw=\"berry \\\"quotes\\\" and \\\\backslashes\\\\ \"" - test_parse_java_property "spark.$delimiter.7" \ - "-Dstraw=berry -Dblue=berry -Dblack=berry" - test_parse_java_property "spark.$delimiter.8" \ - "-Dstraw=\"berry space\" -Dblue=\"berry\" -Dblack=berry" - test_parse_java_property "spark.$delimiter.9" \ - "-Dstraw=\"berry space\" -Dblue=\"berry \\\"quotes\\\"\" -Dblack=\"berry \\\\backslashes\\\\ \"" - test_parse_java_property "spark.$delimiter.10" \ - "-Dstraw=\"berry space\" -Dblue=\"berry \\\"quotes\\\"\" -Dblack=\"berry \\\\backslashes\\\\ \"" \ - IGNORE_WHITESPACE -done -log_error - -# Test split_java_options. Note that this relies on parse_java_property to work correctly. -log_error "--- Testing split_java_options ---" -if [[ "$tests_failed" == 1 ]]; then - log_error "* WARNING: Tests for parse_java_property failed!" - log_error "This should also fail tests for split_java_options" -fi -test_split_java_options "spark.space.1" 1 "-Dstraw=berry" -test_split_java_options "spark.space.2" 1 "-Dstraw=berry" -test_split_java_options "spark.space.3" 1 "-Dstraw=berry again" -test_split_java_options "spark.space.4" 1 "-Dstraw=berry \"quote" -test_split_java_options "spark.space.5" 1 "-Dstraw=berry \\backslash" -test_split_java_options "spark.space.6" 1 "-Dstraw=berry \"quotes\" and \\backslashes\\ " -test_split_java_options "spark.space.7" 3 "-Dstraw=berry" "-Dblue=berry" "-Dblack=berry" -test_split_java_options "spark.space.8" 3 "-Dstraw=berry space" "-Dblue=berry" "-Dblack=berry" -test_split_java_options "spark.space.9" 3 \ - "-Dstraw=berry space" "-Dblue=berry \"quotes\"" "-Dblack=berry \\backslashes\\ " -test_split_java_options "spark.space.10" 3 \ - "-Dstraw=berry space" "-Dblue=berry \"quotes\"" "-Dblack=berry \\backslashes\\ " -log_error - -# Test quote_java_property. Note that this relies on split_java_options to work correctly. -log_error "--- Testing quote_java_property ---" -if [[ "$tests_failed" == 1 ]]; then - log_error "* WARNING: Tests for split_java_options failed!" - log_error "This should also fail tests for quote_java_property" -fi -test_quote_java_property "spark.space.1" 1 "\"-Dstraw=berry\"" -test_quote_java_property "spark.space.2" 1 "\"-Dstraw=berry\"" -test_quote_java_property "spark.space.3" 1 "\"-Dstraw=berry again\"" -test_quote_java_property "spark.space.4" 1 "\"-Dstraw=berry \"quote\"" -test_quote_java_property "spark.space.5" 1 "\"-Dstraw=berry \\backslash\"" -test_quote_java_property "spark.space.6" 1 \ - "\"-Dstraw=berry \"quotes\" and \\backslashes\\ \"" -test_quote_java_property "spark.space.7" 3 \ - "\"-Dstraw=berry\"" "\"-Dblue=berry\"" "\"-Dblack=berry\"" -test_quote_java_property "spark.space.8" 3 \ - "\"-Dstraw=berry space\"" "\"-Dblue=berry\"" "\"-Dblack=berry\"" -test_quote_java_property "spark.space.9" 3 \ - "\"-Dstraw=berry space\"" "\"-Dblue=berry \"quotes\"\"" "\"-Dblack=berry \\backslashes\\ \"" -test_quote_java_property "spark.space.10" 3 \ - "\"-Dstraw=berry space\"" "\"-Dblue=berry \"quotes\"\"" "\"-Dblack=berry \\backslashes\\ \"" -log_error - -# Final test result -if [[ "$tests_failed" == 0 ]]; then - echo "BASH tests passed." -else - echo -e "XXXXX BASH tests failed XXXXX\n" - echo -e "$error_output_buffer" - exit 1 -fi - diff --git a/bin/spark-class b/bin/spark-class index 8390af449288..7a9203cfce47 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -30,9 +30,6 @@ FWDIR="$(cd `dirname $0`/..; pwd)" # Export this as SPARK_HOME export SPARK_HOME="$FWDIR" -# Load utility functions -. "$SPARK_HOME/bin/utils.sh" - . $FWDIR/bin/load-spark-env.sh if [ -z "$1" ]; then @@ -112,10 +109,6 @@ if [ -e "$FWDIR/conf/java-opts" ] ; then JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`" fi -# Split JAVA_OPTS properly to handle whitespace, double quotes and backslashes -# This exports the split java options into SPLIT_JAVA_OPTS -split_java_options "$JAVA_OPTS" - # Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala! TOOLS_DIR="$FWDIR"/tools @@ -156,13 +149,27 @@ if $cygwin; then fi export CLASSPATH -if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then - # Put quotes around system properties in case they contain spaces for readability - # This exports the resulting list of java opts into QUOTED_JAVA_OPTS - quote_java_property "${SPLIT_JAVA_OPTS[@]}" +if [ -n "$SPARK_PRINT_LAUNCH_COMMAND" ]; then echo -n "Spark Command: " 1>&2 - echo "$RUNNER" -cp "$CLASSPATH" "${QUOTED_JAVA_OPTS[@]}" "$@" 1>&2 + echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2 echo -e "========================================\n" 1>&2 fi -exec "$RUNNER" -cp "$CLASSPATH" "${SPLIT_JAVA_OPTS[@]}" "$@" +# In Spark submit client mode, the driver is launched in the same JVM as Spark submit itself. +# Here we must parse the properties file for relevant "spark.driver.*" configs for launching +# the driver JVM itself. + +if [ -n "$SPARK_SUBMIT_CLIENT_MODE" ]; then + exec "$RUNNER" org.apache.spark.deploy.SparkClassLauncher \ + "$PROPERTIES_FILE" \ + "$RUNNER" \ + "$CLASSPATH" \ + "$SPARK_SUBMIT_LIBRARY_PATH" \ + "$JAVA_OPTS" \ + "$OUR_JAVA_MEM" \ + true \ + "$@" +else + exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" +fi + diff --git a/bin/spark-submit b/bin/spark-submit index 73555b29835f..c8a253a3f26f 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -20,9 +20,6 @@ export SPARK_HOME="$(cd `dirname $0`/..; pwd)" ORIG_ARGS=("$@") -# Load utility functions -. "$SPARK_HOME/bin/utils.sh" - while (($#)); do if [ "$1" = "--deploy-mode" ]; then DEPLOY_MODE=$2 @@ -44,68 +41,17 @@ DEPLOY_MODE=${DEPLOY_MODE:-"client"} DEFAULT_PROPERTIES_FILE="$SPARK_HOME/conf/spark-defaults.conf" PROPERTIES_FILE=${PROPERTIES_FILE:-"$DEFAULT_PROPERTIES_FILE"} -unset DRIVER_EXTRA_JAVA_OPTIONS -unset EXECUTOR_EXTRA_JAVA_OPTIONS +# For client mode, the driver will be launched in the same JVM that launches +# SparkSubmit, so we to read the properties file for any class paths, library +# paths, java options and memory early on. Otherwise, it will be too late by +# the time the JVM has started. -# A few Spark configs must be parsed early on before launching the JVM: -# -# [spark.driver.extra*] -# These configs encode java options, class paths, and library paths -# needed to launch the JVM if we are running Spark in client mode -# -# [spark.*.extraJavaOptions] -# The escaped characters in these configs must be preserved for -# splitting the arguments in Java later. For these configs, we -# export the raw values as environment variables. -# -if [[ -f "$PROPERTIES_FILE" ]]; then - echo "Using properties file $PROPERTIES_FILE." 1>&2 - # Parse the properties file here only if these special configs exist - should_parse=$(grep -e "spark.driver.extra*\|spark.*.extraJavaOptions" "$PROPERTIES_FILE") - if [[ -n "$should_parse" ]]; then - # This exports the value of the given key into JAVA_PROPERTY_VALUE - parse_java_property "spark.driver.memory" - DRIVER_MEMORY_CONF="$JAVA_PROPERTY_VALUE" - parse_java_property "spark.driver.extraLibraryPath" - DRIVER_EXTRA_LIBRARY_PATH="$JAVA_PROPERTY_VALUE" - parse_java_property "spark.driver.extraClassPath" - DRIVER_EXTRA_CLASSPATH="$JAVA_PROPERTY_VALUE" - parse_java_property "spark.driver.extraJavaOptions" - DRIVER_EXTRA_JAVA_OPTS="$JAVA_PROPERTY_VALUE" - parse_java_property "spark.executor.extraJavaOptions" - EXECUTOR_EXTRA_JAVA_OPTS="$JAVA_PROPERTY_VALUE" - # Export these for SparkSubmitArguments.scala to consume - if [[ -n "DRIVER_EXTRA_JAVA_OPTS" ]]; then - export DRIVER_EXTRA_JAVA_OPTS - fi - if [[ -n "EXECUTOR_EXTRA_JAVA_OPTS" ]]; then - export EXECUTOR_EXTRA_JAVA_OPTS - fi - fi -elif [[ "$PROPERTIES_FILE" != "$DEFAULT_PROPERTIES_FILE" ]]; then - echo "Warning: properties file $PROPERTIES_FILE does not exist." 1>&2 -fi - -# For client mode, the driver will be launched in the JVM that launches -# SparkSubmit, so we need to handle the class paths, java options, and -# memory preemptively in bash. Otherwise, it will be too late by the -# time the JVM has started. - -if [[ $DEPLOY_MODE == "client" ]]; then - if [[ -n "$DRIVER_EXTRA_JAVA_OPTS" ]]; then - export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS $DRIVER_EXTRA_JAVA_OPTS" - fi - if [[ -n "$DRIVER_EXTRA_CLASSPATH" ]]; then - export SPARK_SUBMIT_CLASSPATH="$SPARK_SUBMIT_CLASSPATH:$DRIVER_EXTRA_CLASSPATH" - fi - if [[ -n "$DRIVER_EXTRA_LIBRARY_PATH" ]]; then - export SPARK_SUBMIT_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DRIVER_EXTRA_LIBRARY_PATH" - fi - # Favor command line memory over config memory - DRIVER_MEMORY=${DRIVER_MEMORY:-"$DRIVER_MEMORY_CONF"} - if [[ -n "$DRIVER_MEMORY" ]]; then +if [ "$DEPLOY_MODE" == "client" ]; then + if [ -n "$DRIVER_MEMORY" ]; then export SPARK_DRIVER_MEMORY=$DRIVER_MEMORY fi + export PROPERTIES_FILE + export SPARK_SUBMIT_CLIENT_MODE=1 fi exec $SPARK_HOME/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}" diff --git a/bin/test.conf b/bin/test.conf deleted file mode 100644 index 437d732d7565..000000000000 --- a/bin/test.conf +++ /dev/null @@ -1,45 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Spark properties file for testing -# -# The configs are separated into three categories, one for each delimiter that Java supports: -# http://docs.oracle.com/javase/7/docs/api/java/util/Properties.html#load(java.io.Reader). -# The values of the configs are assumed to be identical across the categories. Changes in -# this file must be synced with "bin/test.sh" -# -------------------------------------------------------------------------------------------- - -# Space delimiter -spark.space.1 -Dstraw=berry -spark.space.2 -Dstraw="berry" -spark.space.3 -Dstraw="berry again" -spark.space.4 -Dstraw="berry \"quote" -spark.space.5 -Dstraw="berry \\backslash" -spark.space.6 -Dstraw="berry \"quotes\" and \\backslashes\\ " -spark.space.7 -Dstraw=berry -Dblue=berry -Dblack=berry -spark.space.8 -Dstraw="berry space" -Dblue="berry" -Dblack=berry -spark.space.9 -Dstraw="berry space" -Dblue="berry \"quotes\"" -Dblack="berry \\backslashes\\ " -spark.space.10 -Dstraw="berry space" -Dblue="berry \"quotes\"" -Dblack="berry \\backslashes\\ " - -# Equal sign delimiter -spark.equal.1=-Dstraw=berry -spark.equal.2=-Dstraw="berry" -spark.equal.3=-Dstraw="berry again" -spark.equal.4=-Dstraw="berry \"quote" -spark.equal.5=-Dstraw="berry \\backslash" -spark.equal.6=-Dstraw="berry \"quotes\" and \\backslashes\\ " -spark.equal.7=-Dstraw=berry -Dblue=berry -Dblack=berry -spark.equal.8=-Dstraw="berry space" -Dblue="berry" -Dblack=berry -spark.equal.9=-Dstraw="berry space" -Dblue="berry \"quotes\"" -Dblack="berry \\backslashes\\ " -spark.equal.10 = -Dstraw="berry space" -Dblue="berry \"quotes\"" -Dblack="berry \\backslashes\\ " - -# Colon delimiter -spark.colon.1:-Dstraw=berry -spark.colon.2:-Dstraw="berry" -spark.colon.3:-Dstraw="berry again" -spark.colon.4:-Dstraw="berry \"quote" -spark.colon.5:-Dstraw="berry \\backslash" -spark.colon.6:-Dstraw="berry \"quotes\" and \\backslashes\\ " -spark.colon.7:-Dstraw=berry -Dblue=berry -Dblack=berry -spark.colon.8:-Dstraw="berry space" -Dblue="berry" -Dblack=berry -spark.colon.9:-Dstraw="berry space" -Dblue="berry \"quotes\"" -Dblack="berry \\backslashes\\ " -spark.colon.10 : -Dstraw="berry space" -Dblue="berry \"quotes\"" -Dblack="berry \\backslashes\\ " - diff --git a/bin/utils.sh b/bin/utils.sh index c9ed977be143..cb7aa70dea19 100755 --- a/bin/utils.sh +++ b/bin/utils.sh @@ -20,47 +20,6 @@ # | Utility functions for launching Spark applications | # * ---------------------------------------------------- * -# Parse the value of a config from a java properties file according to the specifications in -# http://docs.oracle.com/javase/7/docs/api/java/util/Properties.html#load(java.io.Reader), -# with the exception of the support for multi-line arguments. This accepts the name of the -# config as an argument, and expects the path of the property file to be found in -# PROPERTIES_FILE. The value is returned through JAVA_PROPERTY_VALUE. -function parse_java_property() { - JAVA_PROPERTY_VALUE=$(\ - grep "^[[:space:]]*$1" "$PROPERTIES_FILE" | \ - head -n 1 | \ - sed "s/^[[:space:]]*$1//g" | \ - sed "s/^[[:space:]]*[:=]\{0,1\}//g" | \ - sed "s/^[[:space:]]*//g" | \ - sed "s/[[:space:]]*$//g" - ) - export JAVA_PROPERTY_VALUE -} - -# Properly split java options, dealing with whitespace, double quotes and backslashes. -# This accepts a string and returns the resulting list through SPLIT_JAVA_OPTS. -# For security reasons, this is isolated in its own function. -function split_java_options() { - eval set -- "$1" - SPLIT_JAVA_OPTS=("$@") - export SPLIT_JAVA_OPTS -} - -# Put double quotes around each of the given java options that is a system property. -# This accepts a list and returns the quoted list through QUOTED_JAVA_OPTS -function quote_java_property() { - QUOTED_JAVA_OPTS=() - for opt in "$@"; do - is_system_property=$(echo "$opt" | grep -e "^-D") - if [[ -n "$is_system_property" ]]; then - QUOTED_JAVA_OPTS+=("\"$opt\"") - else - QUOTED_JAVA_OPTS+=("$opt") - fi - done - export QUOTED_JAVA_OPTS -} - # Gather all all spark-submit options into SUBMISSION_OPTS function gatherSparkSubmitOpts() { if [ -z "$SUBMIT_USAGE_FUNCTION" ]; then diff --git a/conf/spark-defaults.conf.template b/conf/spark-defaults.conf.template index 80b72b0ca2d2..92adb4c9a575 100644 --- a/conf/spark-defaults.conf.template +++ b/conf/spark-defaults.conf.template @@ -1,10 +1,9 @@ # Default system properties included when running spark-submit. # This is useful for setting default environmental settings. -# Note that properties that span multiple lines are not supported. # Example: # spark.master spark://master:7077 # spark.eventLog.enabled true # spark.eventLog.dir hdfs://namenode:8021/directory # spark.serializer org.apache.spark.serializer.KryoSerializer -# spark.executor.extraJavaOptions -XX:+PrintGCDetail -Dnumbers="one \"two\" three" +# spark.executor.extraJavaOptions -XX:+PrintGCDetail -Dkey=value -Dnumbers="one two three" diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkClassLauncher.scala b/core/src/main/scala/org/apache/spark/deploy/SparkClassLauncher.scala index 8acabc591cb2..3494577e4933 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkClassLauncher.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkClassLauncher.scala @@ -25,6 +25,8 @@ import org.apache.spark.util.{RedirectThread, Utils} /** * Wrapper of `bin/spark-class` that prepares the launch environment of the child JVM properly. + * This is currently only used for running Spark submit in client mode. The goal moving forward + * is to use this class for all use cases of `bin/spark-class`. */ object SparkClassLauncher { @@ -61,7 +63,7 @@ object SparkClassLauncher { val javaRunner = args(1) val clClassPaths = args(2) val clLibraryPaths = args(3) - val clJavaOpts = args(4) + val clJavaOpts = Utils.splitCommandString(args(4)) val clJavaMemory = args(5) val clientMode = args(6) == "true" val mainClass = args(7) @@ -89,9 +91,8 @@ object SparkClassLauncher { val pathSeparator = sys.props("path.separator") val classPaths = clClassPaths + confClassPaths.map(pathSeparator + _).getOrElse("") val libraryPaths = clLibraryPaths + confLibraryPaths.map(pathSeparator + _).getOrElse("") - val javaOpts = Utils.splitCommandString(clJavaOpts) ++ - confJavaOpts.map(Utils.splitCommandString).getOrElse(Seq.empty) - val filteredJavaOpts = javaOpts.filterNot { opt => + val javaOpts = clJavaOpts ++ confJavaOpts.map(Utils.splitCommandString).getOrElse(Seq.empty) + val filteredJavaOpts = javaOpts.distinct.filterNot { opt => opt.startsWith("-Djava.library.path") || opt.startsWith("-Xms") || opt.startsWith("-Xmx") } diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index 28d4e0f65a56..d545f58c5da7 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -76,15 +76,6 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { } } } - // For spark.*.extraJavaOptions, we cannot rely on the Java properties loader because it - // un-escapes certain characters (" and \) needed to split the string into java options. - // For these configs, use the equivalent environment variables instead. - sys.env.get("DRIVER_EXTRA_JAVA_OPTS").foreach { opts => - defaultProperties("spark.driver.extraJavaOptions") = opts - } - sys.env.get("EXECUTOR_EXTRA_JAVA_OPTS").foreach { opts => - defaultProperties("spark.executor.extraJavaOptions") = opts - } defaultProperties } From c37e08db49200912214681685b1f7ed08b524403 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Mon, 18 Aug 2014 19:05:56 -0700 Subject: [PATCH 37/53] Revert a few more changes --- .rat-excludes | 1 - bin/pyspark | 3 +-- bin/spark-shell | 3 +-- bin/spark-submit | 6 +++--- 4 files changed, 5 insertions(+), 8 deletions(-) diff --git a/.rat-excludes b/.rat-excludes index 3c2451103e4b..eaefef1b0aa2 100644 --- a/.rat-excludes +++ b/.rat-excludes @@ -30,7 +30,6 @@ sorttable.js .*json .*data .*log -.*conf cloudpickle.py join.py SparkExprTyper.scala diff --git a/bin/pyspark b/bin/pyspark index 35a20ae4fcb2..01d42025c978 100755 --- a/bin/pyspark +++ b/bin/pyspark @@ -23,8 +23,7 @@ FWDIR="$(cd `dirname $0`/..; pwd)" # Export this as SPARK_HOME export SPARK_HOME="$FWDIR" -# Load utility functions -. "$FWDIR/bin/utils.sh" +source $FWDIR/bin/utils.sh SCALA_VERSION=2.10 diff --git a/bin/spark-shell b/bin/spark-shell index d3381179d8bc..e0228e1ffe5e 100755 --- a/bin/spark-shell +++ b/bin/spark-shell @@ -41,8 +41,7 @@ if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then usage fi -# Load utility functions -. "$FWDIR/bin/utils.sh" +source $FWDIR/bin/utils.sh SUBMIT_USAGE_FUNCTION=usage gatherSparkSubmitOpts "$@" diff --git a/bin/spark-submit b/bin/spark-submit index c8a253a3f26f..b26475e743a8 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -42,9 +42,9 @@ DEFAULT_PROPERTIES_FILE="$SPARK_HOME/conf/spark-defaults.conf" PROPERTIES_FILE=${PROPERTIES_FILE:-"$DEFAULT_PROPERTIES_FILE"} # For client mode, the driver will be launched in the same JVM that launches -# SparkSubmit, so we to read the properties file for any class paths, library -# paths, java options and memory early on. Otherwise, it will be too late by -# the time the JVM has started. +# SparkSubmit, so we need to read the properties file for any extra class paths, +# library paths, java options and memory early on. Otherwise, it will be too +# late by the time the JVM has started. if [ "$DEPLOY_MODE" == "client" ]; then if [ -n "$DRIVER_MEMORY" ]; then From 3a8235dbc86522b98ccfb1f0868daaeeafc862dc Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Mon, 18 Aug 2014 19:17:16 -0700 Subject: [PATCH 38/53] Only parse the properties file if special configs exist This is to minimize the scope of influence exerted by this PR. In the future, we should make an effort to route all usages of spark-class through SparkClassLauncher so as to avoid duplicate logic in bash and Scala. --- bin/spark-class | 1 + bin/spark-submit | 17 ++++++++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/bin/spark-class b/bin/spark-class index 7a9203cfce47..f4b8af8a5d0f 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -160,6 +160,7 @@ fi # the driver JVM itself. if [ -n "$SPARK_SUBMIT_CLIENT_MODE" ]; then + # This is currently used only if the properties file actually consists of these special configs exec "$RUNNER" org.apache.spark.deploy.SparkClassLauncher \ "$PROPERTIES_FILE" \ "$RUNNER" \ diff --git a/bin/spark-submit b/bin/spark-submit index b26475e743a8..80e8a8ba78cd 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -42,16 +42,23 @@ DEFAULT_PROPERTIES_FILE="$SPARK_HOME/conf/spark-defaults.conf" PROPERTIES_FILE=${PROPERTIES_FILE:-"$DEFAULT_PROPERTIES_FILE"} # For client mode, the driver will be launched in the same JVM that launches -# SparkSubmit, so we need to read the properties file for any extra class paths, -# library paths, java options and memory early on. Otherwise, it will be too -# late by the time the JVM has started. +# SparkSubmit, so we may need to read the properties file for any extra class +# paths, library paths, java options and memory early on. Otherwise, it will +# be too late by the time the JVM has started. if [ "$DEPLOY_MODE" == "client" ]; then if [ -n "$DRIVER_MEMORY" ]; then export SPARK_DRIVER_MEMORY=$DRIVER_MEMORY fi - export PROPERTIES_FILE - export SPARK_SUBMIT_CLIENT_MODE=1 + # Parse the properties file only if the special configs exist + contains_special_configs=$( + grep -e "spark.driver.extra*\|spark.driver.memory" "$PROPERTIES_FILE" | \ + grep -v "^[[:space:]]*#" + ) + if [ -n "$contains_special_configs" ]; then + export PROPERTIES_FILE + export SPARK_SUBMIT_CLIENT_MODE=1 + fi fi exec $SPARK_HOME/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}" From b71f52b7adaf5b3e67e17e31fa4fe11089494e1a Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Mon, 18 Aug 2014 19:21:45 -0700 Subject: [PATCH 39/53] Revert a few more changes (minor) --- bin/spark-shell | 1 - dev/run-tests | 6 ------ 2 files changed, 7 deletions(-) diff --git a/bin/spark-shell b/bin/spark-shell index e0228e1ffe5e..8b7ccd743955 100755 --- a/bin/spark-shell +++ b/bin/spark-shell @@ -42,7 +42,6 @@ if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then fi source $FWDIR/bin/utils.sh - SUBMIT_USAGE_FUNCTION=usage gatherSparkSubmitOpts "$@" diff --git a/dev/run-tests b/dev/run-tests index 0a10cfda2833..0e24515d1376 100755 --- a/dev/run-tests +++ b/dev/run-tests @@ -84,12 +84,6 @@ echo "Running Python style checks" echo "=========================================================================" dev/lint-python -echo "" -echo "=========================================================================" -echo "Running BASH tests" -echo "=========================================================================" -bin/run-tests - echo "" echo "=========================================================================" echo "Running Spark unit tests" From c84f5c8ac6c0d75930a3545bf98b25c6d6287186 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Mon, 18 Aug 2014 19:25:06 -0700 Subject: [PATCH 40/53] Remove debug print statement (minor) --- .../scala/org/apache/spark/deploy/SparkClassLauncher.scala | 3 --- 1 file changed, 3 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkClassLauncher.scala b/core/src/main/scala/org/apache/spark/deploy/SparkClassLauncher.scala index 3494577e4933..19250b3041ed 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkClassLauncher.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkClassLauncher.scala @@ -105,9 +105,6 @@ object SparkClassLauncher { Seq(s"-Xms$javaMemory", s"-Xmx$javaMemory") ++ Seq(mainClass) ++ args.slice(8, args.size) - - command.foreach(println) - val builder = new ProcessBuilder(command) val process = builder.start() new RedirectThread(System.in, process.getOutputStream, "redirect stdin").start() From 158f813e176b6d5b05bac35bfe7e76095ab693b2 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Mon, 18 Aug 2014 22:25:15 -0700 Subject: [PATCH 41/53] Remove "client mode" boolean argument ... and then make SparkClassLauncher private[spark]. --- bin/spark-class | 3 +- .../spark/deploy/SparkClassLauncher.scala | 36 +++++++------------ 2 files changed, 13 insertions(+), 26 deletions(-) diff --git a/bin/spark-class b/bin/spark-class index f4b8af8a5d0f..2a11716942e1 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -160,7 +160,7 @@ fi # the driver JVM itself. if [ -n "$SPARK_SUBMIT_CLIENT_MODE" ]; then - # This is currently used only if the properties file actually consists of these special configs + # This is currently used only if the properties file actually contains these special configs exec "$RUNNER" org.apache.spark.deploy.SparkClassLauncher \ "$PROPERTIES_FILE" \ "$RUNNER" \ @@ -168,7 +168,6 @@ if [ -n "$SPARK_SUBMIT_CLIENT_MODE" ]; then "$SPARK_SUBMIT_LIBRARY_PATH" \ "$JAVA_OPTS" \ "$OUR_JAVA_MEM" \ - true \ "$@" else exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkClassLauncher.scala b/core/src/main/scala/org/apache/spark/deploy/SparkClassLauncher.scala index 19250b3041ed..0ec1d166bafe 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkClassLauncher.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkClassLauncher.scala @@ -25,18 +25,18 @@ import org.apache.spark.util.{RedirectThread, Utils} /** * Wrapper of `bin/spark-class` that prepares the launch environment of the child JVM properly. - * This is currently only used for running Spark submit in client mode. The goal moving forward - * is to use this class for all use cases of `bin/spark-class`. */ -object SparkClassLauncher { +private[spark] object SparkClassLauncher { + + // TODO: This is currently only used for running Spark submit in client mode. + // The goal moving forward is to use this class for all use cases of `bin/spark-class`. /** - * Launch a Spark class with the given class paths, library paths, java options and memory. - * If we are launching an application through Spark submit in client mode, we must also - * take into account special `spark.driver.*` properties needed to start the driver JVM. + * Launch a Spark class with the given class paths, library paths, java options and memory, + * taking into account special `spark.driver.*` properties needed to start the driver JVM. */ def main(args: Array[String]): Unit = { - if (args.size < 8) { + if (args.size < 7) { System.err.println( """ |Usage: org.apache.spark.deploy.SparkClassLauncher @@ -47,14 +47,13 @@ object SparkClassLauncher { | [java library paths] - library paths to pass to the child JVM | [java opts] - java options to pass to the child JVM | [java memory] - memory used to launch the child JVM - | [client mode] - whether the child JVM will run the Spark driver | [main class] - main class to run in the child JVM |
- arguments passed to this main class | |Example: | org.apache.spark.deploy.SparkClassLauncher.SparkClassLauncher | conf/spark-defaults.conf java /classpath1:/classpath2 /librarypath1:/librarypath2 - | "-XX:-UseParallelGC -Dsome=property" 5g true org.apache.spark.deploy.SparkSubmit + | "-XX:-UseParallelGC -Dsome=property" 5g org.apache.spark.deploy.SparkSubmit | --master local --class org.apache.spark.examples.SparkPi 10 """.stripMargin) System.exit(1) @@ -65,29 +64,18 @@ object SparkClassLauncher { val clLibraryPaths = args(3) val clJavaOpts = Utils.splitCommandString(args(4)) val clJavaMemory = args(5) - val clientMode = args(6) == "true" - val mainClass = args(7) + val mainClass = args(6) // In client deploy mode, parse the properties file for certain `spark.driver.*` configs. // These configs encode java options, class paths, and library paths needed to launch the JVM. - val properties = - if (clientMode) { - SparkSubmitArguments.getPropertiesFromFile(new File(propertiesFile)).toMap - } else { - Map[String, String]() - } + val properties = SparkSubmitArguments.getPropertiesFromFile(new File(propertiesFile)).toMap val confDriverMemory = properties.get("spark.driver.memory") val confClassPaths = properties.get("spark.driver.extraClassPath") val confLibraryPaths = properties.get("spark.driver.extraLibraryPath") val confJavaOpts = properties.get("spark.driver.extraJavaOptions") // Merge relevant command line values with the config equivalents, if any - val javaMemory = - if (clientMode) { - confDriverMemory.getOrElse(clJavaMemory) - } else { - clJavaMemory - } + val javaMemory = confDriverMemory.getOrElse(clJavaMemory) val pathSeparator = sys.props("path.separator") val classPaths = clClassPaths + confClassPaths.map(pathSeparator + _).getOrElse("") val libraryPaths = clLibraryPaths + confLibraryPaths.map(pathSeparator + _).getOrElse("") @@ -104,7 +92,7 @@ object SparkClassLauncher { filteredJavaOpts ++ Seq(s"-Xms$javaMemory", s"-Xmx$javaMemory") ++ Seq(mainClass) ++ - args.slice(8, args.size) + args.slice(7, args.size) val builder = new ProcessBuilder(command) val process = builder.start() new RedirectThread(System.in, process.getOutputStream, "redirect stdin").start() From a91ea19032fca9ebacb281ebe2ee6e820accb2b8 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Tue, 19 Aug 2014 01:58:51 -0700 Subject: [PATCH 42/53] Fix precedence of library paths, classpath, java opts and memory This was previously broken because of the way we pass command line arguments. As of this commit, the ordering becomes: SPARK_SUBMIT_DRIVER_MEMORY > spark.driver.memory > SPARK_DRIVER_MEMORY SPARK_SUBMIT_CLASSPATH > spark.driver.extraClassPath SPARK_SUBMIT_LIBRARY_PATH > spark.driver.extraLibraryPath SPARK_SUBMIT_JAVA_OPTS > spark.driver.extraJavaOpts We achieve this by passing existing environment variables to SparkClassLauncher directly. --- bin/spark-class | 52 +++---- bin/spark-submit | 26 ++-- .../spark/deploy/SparkClassLauncher.scala | 136 ++++++++++-------- 3 files changed, 117 insertions(+), 97 deletions(-) diff --git a/bin/spark-class b/bin/spark-class index 2a11716942e1..2d1921474ed7 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -17,6 +17,8 @@ # limitations under the License. # +# NOTE: Any changes to this file must be reflected in SparkClassLauncher.scala! + cygwin=false case "`uname`" in CYGWIN*) cygwin=true;; @@ -73,13 +75,16 @@ case "$1" in OUR_JAVA_MEM=${SPARK_EXECUTOR_MEMORY:-$DEFAULT_MEM} ;; - # Spark submit uses SPARK_SUBMIT_OPTS and SPARK_JAVA_OPTS - 'org.apache.spark.deploy.SparkSubmit') - OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS" + # Spark submit uses SPARK_JAVA_OPTS + SPARK_SUBMIT_JAVA_OPTS + SPARK_DRIVER_MEMORY + SPARK_SUBMIT_DRIVER_MEMORY. + 'org.apache.spark.deploy.SparkSubmit') + OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_JAVA_OPTS" + OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM} if [ -n "$SPARK_SUBMIT_LIBRARY_PATH" ]; then OUR_JAVA_OPTS="$OUR_JAVA_OPTS -Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH" fi - OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM} + if [ -n "$SPARK_SUBMIT_DRIVER_MEMORY" ]; then + OUR_JAVA_MEM="$SPARK_SUBMIT_DRIVER_MEMORY" + fi ;; *) @@ -102,7 +107,6 @@ fi # Set JAVA_OPTS to be able to load native libraries and to set heap size JAVA_OPTS="-XX:MaxPermSize=128m $OUR_JAVA_OPTS" -JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM" # Load extra JAVA_OPTS from conf/java-opts, if it exists if [ -e "$FWDIR/conf/java-opts" ] ; then @@ -149,27 +153,27 @@ if $cygwin; then fi export CLASSPATH -if [ -n "$SPARK_PRINT_LAUNCH_COMMAND" ]; then - echo -n "Spark Command: " 1>&2 - echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2 - echo -e "========================================\n" 1>&2 -fi - # In Spark submit client mode, the driver is launched in the same JVM as Spark submit itself. -# Here we must parse the properties file for relevant "spark.driver.*" configs for launching -# the driver JVM itself. - -if [ -n "$SPARK_SUBMIT_CLIENT_MODE" ]; then - # This is currently used only if the properties file actually contains these special configs - exec "$RUNNER" org.apache.spark.deploy.SparkClassLauncher \ - "$PROPERTIES_FILE" \ - "$RUNNER" \ - "$CLASSPATH" \ - "$SPARK_SUBMIT_LIBRARY_PATH" \ - "$JAVA_OPTS" \ - "$OUR_JAVA_MEM" \ - "$@" +# Here we must parse the properties file for relevant "spark.driver.*" configs before launching +# the driver JVM itself. Instead of handling this complexity in BASH, we launch a separate JVM +# to prepare the launch environment of this driver JVM. + +if [ -n "$SPARK_SUBMIT_BOOTSTRAP_DRIVER" ]; then + # This is used only if the properties file actually contains these special configs + # Export the environment variables needed by SparkClassLauncher + export RUNNER + export CLASSPATH + export JAVA_OPTS + export OUR_JAVA_MEM + shift + exec "$RUNNER" org.apache.spark.deploy.SparkClassLauncher "$@" else + JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM" + if [ -n "$SPARK_PRINT_LAUNCH_COMMAND" ]; then + echo -n "Spark Command: " 1>&2 + echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2 + echo -e "========================================\n" 1>&2 + fi exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" fi diff --git a/bin/spark-submit b/bin/spark-submit index 80e8a8ba78cd..5b7114bb2b20 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -17,47 +17,45 @@ # limitations under the License. # +# NOTE: Any changes in this file must be reflected in SparkClassLauncher.scala! + export SPARK_HOME="$(cd `dirname $0`/..; pwd)" ORIG_ARGS=("$@") while (($#)); do if [ "$1" = "--deploy-mode" ]; then - DEPLOY_MODE=$2 - elif [ "$1" = "--driver-memory" ]; then - DRIVER_MEMORY=$2 + SPARK_SUBMIT_DEPLOY_MODE=$2 elif [ "$1" = "--properties-file" ]; then - PROPERTIES_FILE=$2 + SPARK_SUBMIT_PROPERTIES_FILE=$2 + elif [ "$1" = "--driver-memory" ]; then + export SPARK_SUBMIT_DRIVER_MEMORY=$2 elif [ "$1" = "--driver-library-path" ]; then export SPARK_SUBMIT_LIBRARY_PATH=$2 elif [ "$1" = "--driver-class-path" ]; then export SPARK_SUBMIT_CLASSPATH=$2 elif [ "$1" = "--driver-java-options" ]; then - export SPARK_SUBMIT_OPTS=$2 + export SPARK_SUBMIT_JAVA_OPTS=$2 fi shift done -DEPLOY_MODE=${DEPLOY_MODE:-"client"} DEFAULT_PROPERTIES_FILE="$SPARK_HOME/conf/spark-defaults.conf" -PROPERTIES_FILE=${PROPERTIES_FILE:-"$DEFAULT_PROPERTIES_FILE"} +export SPARK_SUBMIT_DEPLOY_MODE=${SPARK_SUBMIT_DEPLOY_MODE:-"client"} +export SPARK_SUBMIT_PROPERTIES_FILE=${SPARK_SUBMIT_PROPERTIES_FILE:-"$DEFAULT_PROPERTIES_FILE"} # For client mode, the driver will be launched in the same JVM that launches # SparkSubmit, so we may need to read the properties file for any extra class # paths, library paths, java options and memory early on. Otherwise, it will # be too late by the time the JVM has started. -if [ "$DEPLOY_MODE" == "client" ]; then - if [ -n "$DRIVER_MEMORY" ]; then - export SPARK_DRIVER_MEMORY=$DRIVER_MEMORY - fi +if [ "$SPARK_SUBMIT_DEPLOY_MODE" == "client" ]; then # Parse the properties file only if the special configs exist contains_special_configs=$( - grep -e "spark.driver.extra*\|spark.driver.memory" "$PROPERTIES_FILE" | \ + grep -e "spark.driver.extra*\|spark.driver.memory" "$SPARK_SUBMIT_PROPERTIES_FILE" | \ grep -v "^[[:space:]]*#" ) if [ -n "$contains_special_configs" ]; then - export PROPERTIES_FILE - export SPARK_SUBMIT_CLIENT_MODE=1 + export SPARK_SUBMIT_BOOTSTRAP_DRIVER=1 fi fi diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkClassLauncher.scala b/core/src/main/scala/org/apache/spark/deploy/SparkClassLauncher.scala index 0ec1d166bafe..5ebc97339195 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkClassLauncher.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkClassLauncher.scala @@ -24,76 +24,94 @@ import scala.collection.JavaConversions._ import org.apache.spark.util.{RedirectThread, Utils} /** - * Wrapper of `bin/spark-class` that prepares the launch environment of the child JVM properly. + * Launch an application through Spark submit in client mode with the appropriate classpath, + * library paths, java options and memory. These properties of the JVM must be set before the + * driver JVM is launched. The sole purpose of this class is to avoid handling the complexity + * of parsing the properties file for such relevant configs in BASH. + * + * Usage: org.apache.spark.deploy.SparkClassLauncher */ private[spark] object SparkClassLauncher { - // TODO: This is currently only used for running Spark submit in client mode. - // The goal moving forward is to use this class for all use cases of `bin/spark-class`. + // Note: This class depends on the behavior of `bin/spark-class` and `bin/spark-submit`. + // Any changes made there must be reflected in this file. - /** - * Launch a Spark class with the given class paths, library paths, java options and memory, - * taking into account special `spark.driver.*` properties needed to start the driver JVM. - */ def main(args: Array[String]): Unit = { - if (args.size < 7) { - System.err.println( - """ - |Usage: org.apache.spark.deploy.SparkClassLauncher - | - | [properties file] - path to your Spark properties file - | [java runner] - command to launch the child JVM - | [java class paths] - class paths to pass to the child JVM - | [java library paths] - library paths to pass to the child JVM - | [java opts] - java options to pass to the child JVM - | [java memory] - memory used to launch the child JVM - | [main class] - main class to run in the child JVM - |
- arguments passed to this main class - | - |Example: - | org.apache.spark.deploy.SparkClassLauncher.SparkClassLauncher - | conf/spark-defaults.conf java /classpath1:/classpath2 /librarypath1:/librarypath2 - | "-XX:-UseParallelGC -Dsome=property" 5g org.apache.spark.deploy.SparkSubmit - | --master local --class org.apache.spark.examples.SparkPi 10 - """.stripMargin) - System.exit(1) - } - val propertiesFile = args(0) - val javaRunner = args(1) - val clClassPaths = args(2) - val clLibraryPaths = args(3) - val clJavaOpts = Utils.splitCommandString(args(4)) - val clJavaMemory = args(5) - val mainClass = args(6) + val submitArgs = args + val runner = sys.env("RUNNER") + val classpath = sys.env("CLASSPATH") + val javaOpts = sys.env("JAVA_OPTS") + val defaultDriverMemory = sys.env("OUR_JAVA_MEM") + + // Spark submit specific environment variables + val deployMode = sys.env("SPARK_SUBMIT_DEPLOY_MODE") + val propertiesFile = sys.env("SPARK_SUBMIT_PROPERTIES_FILE") + val bootstrapDriver = sys.env("SPARK_SUBMIT_BOOTSTRAP_DRIVER") + val submitDriverMemory = sys.env.get("SPARK_SUBMIT_DRIVER_MEMORY") + val submitLibraryPath = sys.env.get("SPARK_SUBMIT_LIBRARY_PATH") + val submitClasspath = sys.env.get("SPARK_SUBMIT_CLASSPATH") + val submitJavaOpts = sys.env.get("SPARK_SUBMIT_JAVA_OPTS") + + assume(runner != null, "RUNNER must be set") + assume(classpath != null, "CLASSPATH must be set") + assume(javaOpts != null, "JAVA_OPTS must be set") + assume(defaultDriverMemory != null, "OUR_JAVA_MEM must be set") + assume(deployMode == "client", "SPARK_SUBMIT_DEPLOY_MODE must be \"client\"!") + assume(propertiesFile != null, "SPARK_SUBMIT_PROPERTIES_FILE must be set") + assume(bootstrapDriver != null, "SPARK_SUBMIT_BOOTSTRAP_DRIVER must be set!") - // In client deploy mode, parse the properties file for certain `spark.driver.*` configs. - // These configs encode java options, class paths, and library paths needed to launch the JVM. + // Parse the properties file for the equivalent spark.driver.* configs val properties = SparkSubmitArguments.getPropertiesFromFile(new File(propertiesFile)).toMap - val confDriverMemory = properties.get("spark.driver.memory") - val confClassPaths = properties.get("spark.driver.extraClassPath") - val confLibraryPaths = properties.get("spark.driver.extraLibraryPath") - val confJavaOpts = properties.get("spark.driver.extraJavaOptions") + val confDriverMemory = properties.get("spark.driver.memory").getOrElse(defaultDriverMemory) + val confLibraryPath = properties.get("spark.driver.extraLibraryPath").getOrElse("") + val confClasspath = properties.get("spark.driver.extraClassPath").getOrElse("") + val confJavaOpts = properties.get("spark.driver.extraJavaOptions").getOrElse("") - // Merge relevant command line values with the config equivalents, if any - val javaMemory = confDriverMemory.getOrElse(clJavaMemory) - val pathSeparator = sys.props("path.separator") - val classPaths = clClassPaths + confClassPaths.map(pathSeparator + _).getOrElse("") - val libraryPaths = clLibraryPaths + confLibraryPaths.map(pathSeparator + _).getOrElse("") - val javaOpts = clJavaOpts ++ confJavaOpts.map(Utils.splitCommandString).getOrElse(Seq.empty) - val filteredJavaOpts = javaOpts.distinct.filterNot { opt => - opt.startsWith("-Djava.library.path") || opt.startsWith("-Xms") || opt.startsWith("-Xmx") - } + // Favor Spark submit arguments over the equivalent configs in the properties file. + // Note that we do not actually use the Spark submit values for library path, classpath, + // and java opts here, because we have already captured them in BASH. + val newDriverMemory = submitDriverMemory.getOrElse(confDriverMemory) + val newLibraryPath = + if (submitLibraryPath.isDefined) { + // SPARK_SUBMIT_LIBRARY_PATH is already captured in JAVA_OPTS + "" + } else { + "-Djava.library.path=" + confLibraryPath + } + val newClasspath = + if (submitClasspath.isDefined) { + // SPARK_SUBMIT_CLASSPATH is already captured in CLASSPATH + classpath + } else { + classpath + sys.props("path.separator") + confClasspath + } + val newJavaOpts = + if (submitJavaOpts.isDefined) { + // SPARK_SUBMIT_JAVA_OPTS is already captured in JAVA_OPTS + javaOpts + } else { + javaOpts + " " + confJavaOpts + } // Build up command val command: Seq[String] = - Seq(javaRunner) ++ - { if (classPaths.nonEmpty) Seq("-cp", classPaths) else Seq.empty } ++ - { if (libraryPaths.nonEmpty) Seq(s"-Djava.library.path=$libraryPaths") else Seq.empty } ++ - filteredJavaOpts ++ - Seq(s"-Xms$javaMemory", s"-Xmx$javaMemory") ++ - Seq(mainClass) ++ - args.slice(7, args.size) - val builder = new ProcessBuilder(command) + Seq(runner) ++ + Seq("-cp", newClasspath) ++ + Seq(newLibraryPath) ++ + Utils.splitCommandString(newJavaOpts) ++ + Seq(s"-Xms$newDriverMemory", s"-Xmx$newDriverMemory") ++ + Seq("org.apache.spark.deploy.SparkSubmit") ++ + submitArgs + + // Print the launch command. This follows closely the format used in `bin/spark-class`. + if (sys.env.contains("SPARK_PRINT_LAUNCH_COMMAND")) { + System.err.print("Spark Command: ") + System.err.println(command.mkString(" ")) + System.err.println("========================================\n") + } + + val filteredCommand = command.filter(_.nonEmpty) + val builder = new ProcessBuilder(filteredCommand) val process = builder.start() new RedirectThread(System.in, process.getOutputStream, "redirect stdin").start() new RedirectThread(process.getInputStream, System.out, "redirect stdout").start() From 1ea6bbe5ed3c1c2e4d78ebb4712c550c2ce584e7 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Tue, 19 Aug 2014 02:03:56 -0700 Subject: [PATCH 43/53] SparkClassLauncher -> SparkSubmitDriverBootstrapper --- bin/spark-class | 6 +++--- ...ssLauncher.scala => SparkSubmitDriverBootstrapper.scala} | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) rename core/src/main/scala/org/apache/spark/deploy/{SparkClassLauncher.scala => SparkSubmitDriverBootstrapper.scala} (97%) diff --git a/bin/spark-class b/bin/spark-class index 2d1921474ed7..c7745e579d3f 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -17,7 +17,7 @@ # limitations under the License. # -# NOTE: Any changes to this file must be reflected in SparkClassLauncher.scala! +# NOTE: Any changes to this file must be reflected in SparkSubmitDriverBootstrapper.scala! cygwin=false case "`uname`" in @@ -160,13 +160,13 @@ export CLASSPATH if [ -n "$SPARK_SUBMIT_BOOTSTRAP_DRIVER" ]; then # This is used only if the properties file actually contains these special configs - # Export the environment variables needed by SparkClassLauncher + # Export the environment variables needed by SparkSubmitDriverBootstrapper export RUNNER export CLASSPATH export JAVA_OPTS export OUR_JAVA_MEM shift - exec "$RUNNER" org.apache.spark.deploy.SparkClassLauncher "$@" + exec "$RUNNER" org.apache.spark.deploy.SparkSubmitDriverBootstrapper "$@" else JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM" if [ -n "$SPARK_PRINT_LAUNCH_COMMAND" ]; then diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkClassLauncher.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala similarity index 97% rename from core/src/main/scala/org/apache/spark/deploy/SparkClassLauncher.scala rename to core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala index 5ebc97339195..1f82787939ac 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkClassLauncher.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala @@ -29,9 +29,9 @@ import org.apache.spark.util.{RedirectThread, Utils} * driver JVM is launched. The sole purpose of this class is to avoid handling the complexity * of parsing the properties file for such relevant configs in BASH. * - * Usage: org.apache.spark.deploy.SparkClassLauncher + * Usage: org.apache.spark.deploy.SparkSubmitDriverBootstrapper */ -private[spark] object SparkClassLauncher { +private[spark] object SparkSubmitDriverBootstrapper { // Note: This class depends on the behavior of `bin/spark-class` and `bin/spark-submit`. // Any changes made there must be reflected in this file. From 19464ada6d69fd233cbee455bd3af1845a4e2d9d Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Tue, 19 Aug 2014 10:47:31 -0700 Subject: [PATCH 44/53] SPARK_SUBMIT_JAVA_OPTS -> SPARK_SUBMIT_OPTS --- bin/spark-class | 5 +++-- bin/spark-submit | 2 +- .../apache/spark/deploy/SparkSubmitDriverBootstrapper.scala | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/bin/spark-class b/bin/spark-class index c7745e579d3f..49c6bb4fdf75 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -75,9 +75,10 @@ case "$1" in OUR_JAVA_MEM=${SPARK_EXECUTOR_MEMORY:-$DEFAULT_MEM} ;; - # Spark submit uses SPARK_JAVA_OPTS + SPARK_SUBMIT_JAVA_OPTS + SPARK_DRIVER_MEMORY + SPARK_SUBMIT_DRIVER_MEMORY. + # Spark submit uses SPARK_JAVA_OPTS + SPARK_SUBMIT_OPTS + + # SPARK_DRIVER_MEMORY + SPARK_SUBMIT_DRIVER_MEMORY. 'org.apache.spark.deploy.SparkSubmit') - OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_JAVA_OPTS" + OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS" OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM} if [ -n "$SPARK_SUBMIT_LIBRARY_PATH" ]; then OUR_JAVA_OPTS="$OUR_JAVA_OPTS -Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH" diff --git a/bin/spark-submit b/bin/spark-submit index 5b7114bb2b20..faf50e8df198 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -34,7 +34,7 @@ while (($#)); do elif [ "$1" = "--driver-class-path" ]; then export SPARK_SUBMIT_CLASSPATH=$2 elif [ "$1" = "--driver-java-options" ]; then - export SPARK_SUBMIT_JAVA_OPTS=$2 + export SPARK_SUBMIT_OPTS=$2 fi shift done diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala index 1f82787939ac..c4b562fc6dfb 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala @@ -50,7 +50,7 @@ private[spark] object SparkSubmitDriverBootstrapper { val submitDriverMemory = sys.env.get("SPARK_SUBMIT_DRIVER_MEMORY") val submitLibraryPath = sys.env.get("SPARK_SUBMIT_LIBRARY_PATH") val submitClasspath = sys.env.get("SPARK_SUBMIT_CLASSPATH") - val submitJavaOpts = sys.env.get("SPARK_SUBMIT_JAVA_OPTS") + val submitJavaOpts = sys.env.get("SPARK_SUBMIT_OPTS") assume(runner != null, "RUNNER must be set") assume(classpath != null, "CLASSPATH must be set") @@ -87,7 +87,7 @@ private[spark] object SparkSubmitDriverBootstrapper { } val newJavaOpts = if (submitJavaOpts.isDefined) { - // SPARK_SUBMIT_JAVA_OPTS is already captured in JAVA_OPTS + // SPARK_SUBMIT_OPTS is already captured in JAVA_OPTS javaOpts } else { javaOpts + " " + confJavaOpts From 8867a091413cbb5d3f5ff2dc8f959a56c5ee7dc4 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Tue, 19 Aug 2014 10:52:07 -0700 Subject: [PATCH 45/53] A few more naming things (minor) --- bin/spark-class | 2 +- .../apache/spark/deploy/SparkSubmitDriverBootstrapper.scala | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/spark-class b/bin/spark-class index 49c6bb4fdf75..9ce322cab656 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -156,7 +156,7 @@ export CLASSPATH # In Spark submit client mode, the driver is launched in the same JVM as Spark submit itself. # Here we must parse the properties file for relevant "spark.driver.*" configs before launching -# the driver JVM itself. Instead of handling this complexity in BASH, we launch a separate JVM +# the driver JVM itself. Instead of handling this complexity in Bash, we launch a separate JVM # to prepare the launch environment of this driver JVM. if [ -n "$SPARK_SUBMIT_BOOTSTRAP_DRIVER" ]; then diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala index c4b562fc6dfb..8422406b8f07 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala @@ -27,7 +27,7 @@ import org.apache.spark.util.{RedirectThread, Utils} * Launch an application through Spark submit in client mode with the appropriate classpath, * library paths, java options and memory. These properties of the JVM must be set before the * driver JVM is launched. The sole purpose of this class is to avoid handling the complexity - * of parsing the properties file for such relevant configs in BASH. + * of parsing the properties file for such relevant configs in Bash. * * Usage: org.apache.spark.deploy.SparkSubmitDriverBootstrapper */ @@ -69,7 +69,7 @@ private[spark] object SparkSubmitDriverBootstrapper { // Favor Spark submit arguments over the equivalent configs in the properties file. // Note that we do not actually use the Spark submit values for library path, classpath, - // and java opts here, because we have already captured them in BASH. + // and Java opts here, because we have already captured them in Bash. val newDriverMemory = submitDriverMemory.getOrElse(confDriverMemory) val newLibraryPath = if (submitLibraryPath.isDefined) { From 9ba37e2b17a78ebddded6f4fbd30297b8365f57c Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Tue, 19 Aug 2014 11:03:19 -0700 Subject: [PATCH 46/53] Don't barf when the properties file does not exist --- bin/spark-submit | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/spark-submit b/bin/spark-submit index faf50e8df198..fd8f4b8e80de 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -48,7 +48,7 @@ export SPARK_SUBMIT_PROPERTIES_FILE=${SPARK_SUBMIT_PROPERTIES_FILE:-"$DEFAULT_PR # paths, library paths, java options and memory early on. Otherwise, it will # be too late by the time the JVM has started. -if [ "$SPARK_SUBMIT_DEPLOY_MODE" == "client" ]; then +if [[ "$SPARK_SUBMIT_DEPLOY_MODE" == "client" && -f "$SPARK_SUBMIT_PROPERTIES_FILE" ]]; then # Parse the properties file only if the special configs exist contains_special_configs=$( grep -e "spark.driver.extra*\|spark.driver.memory" "$SPARK_SUBMIT_PROPERTIES_FILE" | \ From a78cb2612d67fe1a2d0f3fe64bb94479a63bf969 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Tue, 19 Aug 2014 11:10:07 -0700 Subject: [PATCH 47/53] Revert a few changes in utils.sh (minor) --- bin/utils.sh | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/bin/utils.sh b/bin/utils.sh index cb7aa70dea19..0804b1ed9f23 100755 --- a/bin/utils.sh +++ b/bin/utils.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash + # # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with @@ -16,12 +17,9 @@ # limitations under the License. # -# * ---------------------------------------------------- * -# | Utility functions for launching Spark applications | -# * ---------------------------------------------------- * - # Gather all all spark-submit options into SUBMISSION_OPTS function gatherSparkSubmitOpts() { + if [ -z "$SUBMIT_USAGE_FUNCTION" ]; then echo "Function for printing usage of $0 is not set." 1>&2 echo "Please set usage function to shell variable 'SUBMIT_USAGE_FUNCTION' in $0" 1>&2 @@ -59,4 +57,3 @@ function gatherSparkSubmitOpts() { export SUBMISSION_OPTS export APPLICATION_OPTS } - From d0f20db0e81551bb5f7447fe5491cc2ae5817421 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Tue, 19 Aug 2014 11:26:45 -0700 Subject: [PATCH 48/53] Don't pass empty library paths, classpath, java opts etc. This clarifies the default order of the different ways to set the configs. In particular, we no longer set an empty -Djava.library.path if no library paths are given. --- conf/spark-defaults.conf.template | 1 + .../SparkSubmitDriverBootstrapper.scala | 24 ++++++++++++------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/conf/spark-defaults.conf.template b/conf/spark-defaults.conf.template index 92adb4c9a575..94427029b94d 100644 --- a/conf/spark-defaults.conf.template +++ b/conf/spark-defaults.conf.template @@ -6,4 +6,5 @@ # spark.eventLog.enabled true # spark.eventLog.dir hdfs://namenode:8021/directory # spark.serializer org.apache.spark.serializer.KryoSerializer +# spark.driver.memory 5g # spark.executor.extraJavaOptions -XX:+PrintGCDetail -Dkey=value -Dnumbers="one two three" diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala index 8422406b8f07..5d7cabd6ebd2 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala @@ -58,39 +58,45 @@ private[spark] object SparkSubmitDriverBootstrapper { assume(defaultDriverMemory != null, "OUR_JAVA_MEM must be set") assume(deployMode == "client", "SPARK_SUBMIT_DEPLOY_MODE must be \"client\"!") assume(propertiesFile != null, "SPARK_SUBMIT_PROPERTIES_FILE must be set") - assume(bootstrapDriver != null, "SPARK_SUBMIT_BOOTSTRAP_DRIVER must be set!") + assume(bootstrapDriver != null, "SPARK_SUBMIT_BOOTSTRAP_DRIVER must be set") // Parse the properties file for the equivalent spark.driver.* configs val properties = SparkSubmitArguments.getPropertiesFromFile(new File(propertiesFile)).toMap - val confDriverMemory = properties.get("spark.driver.memory").getOrElse(defaultDriverMemory) - val confLibraryPath = properties.get("spark.driver.extraLibraryPath").getOrElse("") - val confClasspath = properties.get("spark.driver.extraClassPath").getOrElse("") - val confJavaOpts = properties.get("spark.driver.extraJavaOptions").getOrElse("") + val confDriverMemory = properties.get("spark.driver.memory") + val confLibraryPath = properties.get("spark.driver.extraLibraryPath") + val confClasspath = properties.get("spark.driver.extraClassPath") + val confJavaOpts = properties.get("spark.driver.extraJavaOptions") // Favor Spark submit arguments over the equivalent configs in the properties file. // Note that we do not actually use the Spark submit values for library path, classpath, // and Java opts here, because we have already captured them in Bash. - val newDriverMemory = submitDriverMemory.getOrElse(confDriverMemory) + + val newDriverMemory = submitDriverMemory + .orElse(confDriverMemory) + .getOrElse(defaultDriverMemory) + val newLibraryPath = if (submitLibraryPath.isDefined) { // SPARK_SUBMIT_LIBRARY_PATH is already captured in JAVA_OPTS "" } else { - "-Djava.library.path=" + confLibraryPath + confLibraryPath.map("-Djava.library.path=" + _).getOrElse("") } + val newClasspath = if (submitClasspath.isDefined) { // SPARK_SUBMIT_CLASSPATH is already captured in CLASSPATH classpath } else { - classpath + sys.props("path.separator") + confClasspath + classpath + confClasspath.map(sys.props("path.separator") + _).getOrElse("") } + val newJavaOpts = if (submitJavaOpts.isDefined) { // SPARK_SUBMIT_OPTS is already captured in JAVA_OPTS javaOpts } else { - javaOpts + " " + confJavaOpts + javaOpts + confJavaOpts.map(" " + _).getOrElse("") } // Build up command From 9a778f66d812886b3322b32ae92fccf9d8415657 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Tue, 19 Aug 2014 23:57:23 -0700 Subject: [PATCH 49/53] Fix PySpark: actually kill driver on termination We use the stdin broken pipe as a signal that the parent process that launched the SparkSubmitDriverBootstrapper JVM has exited. This is very similar to what Py4J's JavaGateway does (see the parameter "--die-on-broken-pipe"). This allows both JVMs to actually terminate after the application has finished. This was especially relevant for the PySpark shell, where Spark submit itself is launched as a python subprocess and the driver was never actually killed even after the shell had exited. --- .../SparkSubmitDriverBootstrapper.scala | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala index 5d7cabd6ebd2..f4c8eeb581b3 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala @@ -29,7 +29,7 @@ import org.apache.spark.util.{RedirectThread, Utils} * driver JVM is launched. The sole purpose of this class is to avoid handling the complexity * of parsing the properties file for such relevant configs in Bash. * - * Usage: org.apache.spark.deploy.SparkSubmitDriverBootstrapper + * Usage: org.apache.spark.deploy.SparkSubmitDriverBootstrapper */ private[spark] object SparkSubmitDriverBootstrapper { @@ -116,13 +116,23 @@ private[spark] object SparkSubmitDriverBootstrapper { System.err.println("========================================\n") } + // Start the driver JVM val filteredCommand = command.filter(_.nonEmpty) val builder = new ProcessBuilder(filteredCommand) val process = builder.start() - new RedirectThread(System.in, process.getOutputStream, "redirect stdin").start() - new RedirectThread(process.getInputStream, System.out, "redirect stdout").start() - new RedirectThread(process.getErrorStream, System.err, "redirect stderr").start() - System.exit(process.waitFor()) + + // Redirect stdin, stdout, and stderr to/from the child JVM + val stdinThread = new RedirectThread(System.in, process.getOutputStream, "redirect stdin") + val stdoutThread = new RedirectThread(process.getInputStream, System.out, "redirect stdout") + val stderrThread = new RedirectThread(process.getErrorStream, System.err, "redirect stderr") + stdinThread.start() + stdoutThread.start() + stderrThread.start() + + // Terminate on broken pipe, which signals that the parent process has exited. This is + // important for the PySpark shell, where Spark submit itself is a python subprocess. + stdinThread.join() + process.destroy() } } From 51aeb018d00c18af239405328810b686430fe8a7 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Wed, 20 Aug 2014 00:11:03 -0700 Subject: [PATCH 50/53] Filter out JVM memory in Scala rather than Bash (minor) --- bin/spark-class | 2 +- .../apache/spark/deploy/SparkSubmitDriverBootstrapper.scala | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/bin/spark-class b/bin/spark-class index 9ce322cab656..5f047b3a777f 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -108,6 +108,7 @@ fi # Set JAVA_OPTS to be able to load native libraries and to set heap size JAVA_OPTS="-XX:MaxPermSize=128m $OUR_JAVA_OPTS" +JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM" # Load extra JAVA_OPTS from conf/java-opts, if it exists if [ -e "$FWDIR/conf/java-opts" ] ; then @@ -169,7 +170,6 @@ if [ -n "$SPARK_SUBMIT_BOOTSTRAP_DRIVER" ]; then shift exec "$RUNNER" org.apache.spark.deploy.SparkSubmitDriverBootstrapper "$@" else - JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM" if [ -n "$SPARK_PRINT_LAUNCH_COMMAND" ]; then echo -n "Spark Command: " 1>&2 echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2 diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala index f4c8eeb581b3..bd6767bc7882 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala @@ -99,12 +99,16 @@ private[spark] object SparkSubmitDriverBootstrapper { javaOpts + confJavaOpts.map(" " + _).getOrElse("") } + val filteredJavaOpts = Utils.splitCommandString(newJavaOpts) + .filterNot(_.startsWith("-Xms")) + .filterNot(_.startsWith("-Xmx")) + // Build up command val command: Seq[String] = Seq(runner) ++ Seq("-cp", newClasspath) ++ Seq(newLibraryPath) ++ - Utils.splitCommandString(newJavaOpts) ++ + filteredJavaOpts ++ Seq(s"-Xms$newDriverMemory", s"-Xmx$newDriverMemory") ++ Seq("org.apache.spark.deploy.SparkSubmit") ++ submitArgs From ff347289247b17d4bfb524e83e079e9efa8a4d6e Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Wed, 20 Aug 2014 00:15:40 -0700 Subject: [PATCH 51/53] Minor comments --- bin/spark-class | 1 + bin/spark-submit | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/spark-class b/bin/spark-class index 5f047b3a777f..629843333f2c 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -170,6 +170,7 @@ if [ -n "$SPARK_SUBMIT_BOOTSTRAP_DRIVER" ]; then shift exec "$RUNNER" org.apache.spark.deploy.SparkSubmitDriverBootstrapper "$@" else + # Note: The format of this command is closely echoed in SparkSubmitDriverBootstrapper.scala if [ -n "$SPARK_PRINT_LAUNCH_COMMAND" ]; then echo -n "Spark Command: " 1>&2 echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2 diff --git a/bin/spark-submit b/bin/spark-submit index fd8f4b8e80de..32c911cd0438 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -46,7 +46,7 @@ export SPARK_SUBMIT_PROPERTIES_FILE=${SPARK_SUBMIT_PROPERTIES_FILE:-"$DEFAULT_PR # For client mode, the driver will be launched in the same JVM that launches # SparkSubmit, so we may need to read the properties file for any extra class # paths, library paths, java options and memory early on. Otherwise, it will -# be too late by the time the JVM has started. +# be too late by the time the driver JVM has started. if [[ "$SPARK_SUBMIT_DEPLOY_MODE" == "client" && -f "$SPARK_SUBMIT_PROPERTIES_FILE" ]]; then # Parse the properties file only if the special configs exist From 08fd7880337289f796151f6af3100bcc44130386 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Wed, 20 Aug 2014 00:22:56 -0700 Subject: [PATCH 52/53] Warn against external usages of SparkSubmitDriverBootstrapper --- bin/spark-class | 3 ++- .../spark/deploy/SparkSubmitDriverBootstrapper.scala | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/bin/spark-class b/bin/spark-class index 629843333f2c..1071eb9a953d 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -167,7 +167,8 @@ if [ -n "$SPARK_SUBMIT_BOOTSTRAP_DRIVER" ]; then export CLASSPATH export JAVA_OPTS export OUR_JAVA_MEM - shift + export SPARK_CLASS=1 + shift # Ignore main class and use our own exec "$RUNNER" org.apache.spark.deploy.SparkSubmitDriverBootstrapper "$@" else # Note: The format of this command is closely echoed in SparkSubmitDriverBootstrapper.scala diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala index bd6767bc7882..af607e6a4a06 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala @@ -37,6 +37,13 @@ private[spark] object SparkSubmitDriverBootstrapper { // Any changes made there must be reflected in this file. def main(args: Array[String]): Unit = { + + // This should be called only from `bin/spark-class` + if (!sys.env.contains("SPARK_CLASS")) { + System.err.println("SparkSubmitDriverBootstrapper must be called from `bin/spark-class`!") + System.exit(1) + } + val submitArgs = args val runner = sys.env("RUNNER") val classpath = sys.env("CLASSPATH") From bed4bdff74e941f791080e9407fc5295af10f677 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Wed, 20 Aug 2014 13:53:23 -0700 Subject: [PATCH 53/53] Change a few comments / messages (minor) --- bin/spark-class | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/spark-class b/bin/spark-class index 1071eb9a953d..22acf92288b3 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -41,7 +41,7 @@ fi if [ -n "$SPARK_MEM" ]; then echo -e "Warning: SPARK_MEM is deprecated, please use a more specific config option" 1>&2 - echo -e "(e.g., spark.executor.memory or SPARK_DRIVER_MEMORY)." 1>&2 + echo -e "(e.g., spark.executor.memory or spark.driver.memory)." 1>&2 fi # Use SPARK_MEM or 512m as the default memory, to be overridden by specific options @@ -168,7 +168,7 @@ if [ -n "$SPARK_SUBMIT_BOOTSTRAP_DRIVER" ]; then export JAVA_OPTS export OUR_JAVA_MEM export SPARK_CLASS=1 - shift # Ignore main class and use our own + shift # Ignore main class (org.apache.spark.deploy.SparkSubmit) and use our own exec "$RUNNER" org.apache.spark.deploy.SparkSubmitDriverBootstrapper "$@" else # Note: The format of this command is closely echoed in SparkSubmitDriverBootstrapper.scala