Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
ac2d65e
Change spark.local.dir -> SPARK_LOCAL_DIRS
pwendell Mar 31, 2014
0faa3b6
Stash of adding config options in submit script and YARN
pwendell Apr 1, 2014
6eaf7d0
executorJavaOpts
pwendell Apr 1, 2014
4982331
Remove SPARK_LIBRARY_PATH
pwendell Apr 1, 2014
1f75238
SPARK_JAVA_OPTS --> SPARK_MASTER_OPTS for master settings
pwendell Apr 1, 2014
84cc5e5
Small clean-up
pwendell Apr 1, 2014
5b0ba8e
Don't ship executor envs
pwendell Apr 2, 2014
7cc70e4
Clean up terminology inside of spark-env script
pwendell Apr 2, 2014
761ebcd
Library path and classpath for drivers
pwendell Apr 2, 2014
437aed1
Small fix
pwendell Apr 2, 2014
46555c1
Review feedback and import clean-ups
pwendell Apr 13, 2014
b72d183
Review feedback for spark env file
pwendell Apr 13, 2014
ace4ead
Responses to review feedback.
pwendell Apr 13, 2014
b08893b
Additional improvements.
pwendell Apr 13, 2014
afc9ed8
Cleaning up line limits and two compile errors.
pwendell Apr 14, 2014
4ee6f9d
Making YARN doc changes consistent
pwendell Apr 14, 2014
c2a2909
Test compile fixes
pwendell Apr 14, 2014
be42f35
Handle case where SPARK_HOME is not set
pwendell Apr 15, 2014
e83cd8f
Changes to allow re-use of test applications
pwendell Apr 15, 2014
308f1f6
Properly escape quotes and other clean-up for YARN
pwendell Apr 15, 2014
fda0301
Note
pwendell Apr 15, 2014
ffa00fe
Review feedback
pwendell Apr 18, 2014
a762901
Fixing test failures
pwendell Apr 18, 2014
d50c388
Merge remote-tracking branch 'apache/master' into config-cleanup
pwendell Apr 18, 2014
a56b125
Responses to Tom's review
pwendell Apr 18, 2014
af0adf7
Automatically add user jar
pwendell Apr 18, 2014
b16e6a2
Cleanup of spark-submit script and Scala quick start guide
pwendell Apr 20, 2014
af09e3e
Mention config file in docs and clean-up docs
pwendell Apr 21, 2014
0086939
Minor style fixes
pwendell Apr 21, 2014
b4b496c
spark-defaults.properties -> spark-defaults.conf
pwendell Apr 21, 2014
a006464
Moving properties file template.
pwendell Apr 21, 2014
127f301
Improvements to testing
pwendell Apr 21, 2014
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Stash of adding config options in submit script and YARN
  • Loading branch information
pwendell committed Apr 13, 2014
commit 0faa3b6ff0e0b37b18fdde3d6a6110459ffd8f28
36 changes: 30 additions & 6 deletions core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@

package org.apache.spark.deploy

import java.io.{PrintStream, File}
import java.io.{FileInputStream, PrintStream, File}
import java.net.URL
import java.util.Properties

import org.apache.spark.executor.ExecutorURLClassLoader

import scala.collection.JavaConversions._
import scala.collection.mutable.ArrayBuffer
import scala.collection.mutable.HashMap
import scala.collection.mutable.Map
Expand Down Expand Up @@ -108,6 +110,21 @@ object SparkSubmit {
val sysProps = new HashMap[String, String]()
var childMainClass = ""

// Load system properties by default from the file, if present
if (appArgs.verbose) printStream.println(s"Using properties file: ${appArgs.propertiesFile}")
Option(appArgs.propertiesFile).map { filename =>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

foreach

val file = new File(filename)
getDefaultProperties(file).foreach { case (k, v) =>
if (k.startsWith("spark")) {
sysProps(k) = v
if (appArgs.verbose) printStream.println(s"Adding default property: $k=$v")
}
else {
printWarning(s"Ignoring non-spark config property: $k=$v")
}
}
}

if (clusterManager == MESOS && deployOnCluster) {
printErrorAndExit("Mesos does not support running the driver on the cluster")
}
Expand Down Expand Up @@ -191,11 +208,11 @@ object SparkSubmit {
sysProps: Map[String, String], childMainClass: String, verbose: Boolean = false) {

if (verbose) {
System.err.println(s"Main class:\n$childMainClass")
System.err.println(s"Arguments:\n${childArgs.mkString("\n")}")
System.err.println(s"System properties:\n${sysProps.mkString("\n")}")
System.err.println(s"Classpath elements:\n${childClasspath.mkString("\n")}")
System.err.println("\n")
printStream.println(s"Main class:\n$childMainClass")
printStream.println(s"Arguments:\n${childArgs.mkString("\n")}")
printStream.println(s"System properties:\n${sysProps.mkString("\n")}")
printStream.println(s"Classpath elements:\n${childClasspath.mkString("\n")}")
printStream.println("\n")
}

val loader = new ExecutorURLClassLoader(new Array[URL](0),
Expand Down Expand Up @@ -224,6 +241,13 @@ object SparkSubmit {
val url = localJarFile.getAbsoluteFile.toURI.toURL
loader.addURL(url)
}

private def getDefaultProperties(file: File): Seq[(String, String)] = {
val inputStream = new FileInputStream(file)
val properties = new Properties()
properties.load(inputStream)
properties.stringPropertyNames().toSeq.map(k => (k, properties(k)))
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be good to add a try catch here (or just throw a nice exception)

}

private[spark] class OptionAssigner(val value: String,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.spark.deploy

import scala.collection.mutable.ArrayBuffer
import java.io.File

/**
* Parses and encapsulates arguments from the spark-submit script.
Expand All @@ -28,6 +29,7 @@ private[spark] class SparkSubmitArguments(args: Array[String]) {
var executorMemory: String = null
var executorCores: String = null
var totalExecutorCores: String = null
var propertiesFile: String = null
var driverMemory: String = null
var driverCores: String = null
var supervise: Boolean = false
Expand All @@ -49,6 +51,15 @@ private[spark] class SparkSubmitArguments(args: Array[String]) {
if (args.length == 0) printUsageAndExit(-1)
if (primaryResource == null) SparkSubmit.printErrorAndExit("Must specify a primary resource")
if (mainClass == null) SparkSubmit.printErrorAndExit("Must specify a main class with --class")
if (propertiesFile == null) {
val sparkHome = sys.env("SPARK_HOME") // defined via `spark-class`
val sep = File.separator
val defaultPath = s"${sparkHome}${sep}conf${sep}spark-defaults.properties"
val file = new File(defaultPath)
if (file.exists()) {
propertiesFile = file.getAbsolutePath
}
}

override def toString = {
s"""Parsed arguments:
Expand All @@ -57,8 +68,9 @@ private[spark] class SparkSubmitArguments(args: Array[String]) {
| executorMemory $executorMemory
| executorCores $executorCores
| totalExecutorCores $totalExecutorCores
| propertiesFile $propertiesFile
| driverMemory $driverMemory
| drivercores $driverCores
| driverCores $driverCores
| supervise $supervise
| queue $queue
| numExecutors $numExecutors
Expand Down Expand Up @@ -122,6 +134,10 @@ private[spark] class SparkSubmitArguments(args: Array[String]) {
driverCores = value
parseOpts(tail)

case ("--properties-file") :: value :: tail =>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this isn't being printed in usage of spark-submit script.

propertiesFile = value
parseOpts(tail)

case ("--supervise") :: tail =>
supervise = true
parseOpts(tail)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ import org.apache.hadoop.yarn.util.{Records, Apps}
import org.apache.spark.{Logging, SparkConf}
import org.apache.spark.util.Utils
import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.deploy.ExecutorLauncher
import org.apache.hadoop.yarn.api.ApplicationConstants.Environment


Expand Down Expand Up @@ -340,8 +341,19 @@ trait ClientBase extends Logging {
JAVA_OPTS += " -XX:CMSIncrementalDutyCycle=10 "
}

if (env.isDefinedAt("SPARK_JAVA_OPTS")) {
JAVA_OPTS += " " + env("SPARK_JAVA_OPTS")

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing support for this is to going to fail too many jobs which are currently run via cron, this is going to make things very messy.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mridulm we could add this back to make it backwards-compatible and give a warning. Would that make sense?

Can you give examples of what people are setting in SPARK_JAVA_OPTS? Just curious how people are using it. Also, what does cron have to do with it?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Warning the user would be great, just not remove support for it :-)
I dont have my scripts at home, but these are used to specify application specific defines (which wont start with 'spark' prefer), etc currently iirc. There are no other ways to do it right now.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cron as in periodically run via oozie or just normal cron.
so not manually triggered, and so failure of those jobs wont even be noticed for a while (and only after they have already impacted other things)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay sounds good. If you have examples of what values this is being used for would be helpful (e.g. are they setting GC settings, or is some application-specific system properties or what).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

application specific defines, -X* config values, etc

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thinking a bit more, we have two options here:

(a) make a backwards incompatible change here and people have to re-write there jobs
(b) continue supporting shipping SPARK_JAVA_OPTS from the driver to the executors for the entire 1.X family of Spark releases (e.g probably years).

I guess we can do (a) but I might give a loud error message here so that users change this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm actually I'm not so sure. The existing behavior is really confusing because it means that if SPARK_JAVA_OPTS is set on the executors and the driver... the behavior is basically undefined. It might be worth it to bite the bullet here rather than continue to support this unpredictable behavior for a long time.

if (args.amClass == classOf[ExecutorLauncher].getName) {
// If we are being launched in client mode, forward the spark-conf options
// onto the executor launcher
for ((k, v) <- sparkConf.getAll) {
JAVA_OPTS += s"-D$k=$v"
}
} else {
// If we are being launched in standalone mode, capture and forward any spark
// system properties (e.g. set by spark-class).
for ((k, v) <- sys.props.filterKeys(_.startsWith("spark"))) {
JAVA_OPTS += s"-D$k=$v"
}
}

if (!localResources.contains(ClientBase.LOG4J_PROP)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,10 @@ trait ExecutorRunnableUtil extends Logging {
// Set the JVM memory
val executorMemoryString = executorMemory + "m"
JAVA_OPTS += "-Xms" + executorMemoryString + " -Xmx" + executorMemoryString + " "
if (env.isDefinedAt("SPARK_JAVA_OPTS")) {
JAVA_OPTS += env("SPARK_JAVA_OPTS") + " "

/* Pass on Spark properties to the driver. */
for ((k, v) <- sys.props.filterKeys(_.startsWith("spark"))) {
JAVA_OPTS += s"-D$k=$v"
}

JAVA_OPTS += " -Djava.io.tmpdir=" +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import org.apache.spark.util.Utils
*/
private[spark] class YarnClientClusterScheduler(sc: SparkContext, conf: Configuration) extends TaskSchedulerImpl(sc) {

def this(sc: SparkContext) = this(sc, new Configuration())
def this(sc: SparkContext) = this(sc, sc.getConf)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I'm missing something here, but doesn't sc.getConf return SparkConf, not a hadoop Configuration?


// By default, rack is unknown
override def getRackForHost(hostPort: String): Option[String] = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.scheduler.cluster

import org.apache.hadoop.yarn.api.records.{ApplicationId, YarnApplicationState}
import org.apache.spark.{SparkException, Logging, SparkContext}
import org.apache.spark.deploy.yarn.{Client, ClientArguments}
import org.apache.spark.deploy.yarn.{Client, ClientArguments, ExecutorLauncher}
import org.apache.spark.scheduler.TaskSchedulerImpl

import scala.collection.mutable.ArrayBuffer
Expand Down Expand Up @@ -54,7 +54,7 @@ private[spark] class YarnClientSchedulerBackend(
"--class", "notused",
"--jar", null,
"--args", hostport,
"--am-class", "org.apache.spark.deploy.yarn.ExecutorLauncher"
"--am-class", classOf[ExecutorLauncher].getName
)

// process any optional arguments, given either as environment variables
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ class ExecutorLauncher(args: ApplicationMasterArguments, conf: Configuration, sp
}

def finishApplicationMaster(status: FinalApplicationStatus) {
logInfo("finish ApplicationMaster with " + status)
logInfo("finish ApplicationEMaster with " + status)
amClient.unregisterApplicationMaster(status, "" /* appMessage */ , "" /* appTrackingUrl */)
}

Expand Down