apache
diff --git a/‎R/pkg/R/deserialize.R‎
Lines changed: 1 addition & 1 deletion b/‎R/pkg/R/deserialize.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala‎
Lines changed: 9 additions & 4 deletions b/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala‎
Lines changed: 30 additions & 12 deletions b/‎core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala‎
Lines changed: 30 additions & 12 deletions
diff --git a/‎core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala‎
Lines changed: 1 addition & 0 deletions b/‎core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/running-on-mesos.md‎
Lines changed: 2 additions & 0 deletions b/‎docs/running-on-mesos.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala‎
Lines changed: 2 additions & 0 deletions b/‎mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala‎
Lines changed: 1 addition & 0 deletions b/‎mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala‎
Lines changed: 1 addition & 0 deletions b/‎mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala‎
Lines changed: 1 addition & 0 deletions b/‎mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala‎
Lines changed: 2 additions & 0 deletions b/‎mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala‎
Lines changed: 2 additions & 0 deletions
@@ -60,7 +60,7 @@ readString <- function(con) {
   raw <- readBin(con, raw(), stringLen, endian = "big")
   string <- rawToChar(raw)
   Encoding(string) <- "UTF-8"
-  string 
+  string
 }
 
 readInt <- function(con) {
 
@@ -319,9 +319,6 @@ object SparkSubmit {
 
     // The following modes are not supported or applicable
     (clusterManager, deployMode) match {
-      case (MESOS, CLUSTER) if args.isPython =>
-        printErrorAndExit("Cluster deploy mode is currently not supported for python " +
-          "applications on Mesos clusters.")
       case (MESOS, CLUSTER) if args.isR =>
         printErrorAndExit("Cluster deploy mode is currently not supported for R " +
           "applications on Mesos clusters.")
@@ -554,7 +551,15 @@ object SparkSubmit {
     if (isMesosCluster) {
       assert(args.useRest, "Mesos cluster mode is only supported through the REST submission API")
       childMainClass = "org.apache.spark.deploy.rest.RestSubmissionClient"
-      childArgs += (args.primaryResource, args.mainClass)
+      if (args.isPython) {
+        // Second argument is main class
+        childArgs += (args.primaryResource, "")
+        if (args.pyFiles != null) {
+          sysProps("spark.submit.pyFiles") = args.pyFiles
+        }
+      } else {
+        childArgs += (args.primaryResource, args.mainClass)
+      }
       if (args.childArgs != null) {
         childArgs ++= args.childArgs
       }
 
@@ -29,7 +29,6 @@ import org.apache.mesos.Protos.Environment.Variable
 import org.apache.mesos.Protos.TaskStatus.Reason
 import org.apache.mesos.Protos.{TaskState => MesosTaskState, _}
 import org.apache.mesos.{Scheduler, SchedulerDriver}
-
 import org.apache.spark.deploy.mesos.MesosDriverDescription
 import org.apache.spark.deploy.rest.{CreateSubmissionResponse, KillSubmissionResponse, SubmissionStatusResponse}
 import org.apache.spark.metrics.MetricsSystem
@@ -375,21 +374,20 @@ private[spark] class MesosClusterScheduler(
     val executorOpts = desc.schedulerProperties.map { case (k, v) => s"-D$k=$v" }.mkString(" ")
     envBuilder.addVariables(
       Variable.newBuilder().setName("SPARK_EXECUTOR_OPTS").setValue(executorOpts))
-    val cmdOptions = generateCmdOption(desc).mkString(" ")
     val dockerDefined = desc.schedulerProperties.contains("spark.mesos.executor.docker.image")
     val executorUri = desc.schedulerProperties.get("spark.executor.uri")
       .orElse(desc.command.environment.get("SPARK_EXECUTOR_URI"))
-    val appArguments = desc.command.arguments.mkString(" ")
-    val (executable, jar) = if (dockerDefined) {
+    // Gets the path to run spark-submit, and the path to the Mesos sandbox.
+    val (executable, sandboxPath) = if (dockerDefined) {
       // Application jar is automatically downloaded in the mounted sandbox by Mesos,
       // and the path to the mounted volume is stored in $MESOS_SANDBOX env variable.
-      ("./bin/spark-submit", s"$$MESOS_SANDBOX/${desc.jarUrl.split("/").last}")
+      ("./bin/spark-submit", "$MESOS_SANDBOX")
     } else if (executorUri.isDefined) {
       builder.addUris(CommandInfo.URI.newBuilder().setValue(executorUri.get).build())
       val folderBasename = executorUri.get.split('/').last.split('.').head
       val cmdExecutable = s"cd $folderBasename*; $prefixEnv bin/spark-submit"
-      val cmdJar = s"../${desc.jarUrl.split("/").last}"
-      (cmdExecutable, cmdJar)
+      // Sandbox path points to the parent folder as we chdir into the folderBasename.
+      (cmdExecutable, "..")
     } else {
       val executorSparkHome = desc.schedulerProperties.get("spark.mesos.executor.home")
         .orElse(conf.getOption("spark.home"))
@@ -398,30 +396,50 @@ private[spark] class MesosClusterScheduler(
           throw new SparkException("Executor Spark home `spark.mesos.executor.home` is not set!")
         }
       val cmdExecutable = new File(executorSparkHome, "./bin/spark-submit").getCanonicalPath
-      val cmdJar = desc.jarUrl.split("/").last
-      (cmdExecutable, cmdJar)
+      // Sandbox points to the current directory by default with Mesos.
+      (cmdExecutable, ".")
     }
-    builder.setValue(s"$executable $cmdOptions $jar $appArguments")
+    val primaryResource = new File(sandboxPath, desc.jarUrl.split("/").last).toString()
+    val cmdOptions = generateCmdOption(desc, sandboxPath).mkString(" ")
+    val appArguments = desc.command.arguments.mkString(" ")
+    builder.setValue(s"$executable $cmdOptions $primaryResource $appArguments")
     builder.setEnvironment(envBuilder.build())
     conf.getOption("spark.mesos.uris").map { uris =>
       setupUris(uris, builder)
     }
+    desc.schedulerProperties.get("spark.mesos.uris").map { uris =>
+      setupUris(uris, builder)
+    }
+    desc.schedulerProperties.get("spark.submit.pyFiles").map { pyFiles =>
+      setupUris(pyFiles, builder)
+    }
     builder.build()
   }
 
-  private def generateCmdOption(desc: MesosDriverDescription): Seq[String] = {
+  private def generateCmdOption(desc: MesosDriverDescription, sandboxPath: String): Seq[String] = {
     var options = Seq(
       "--name", desc.schedulerProperties("spark.app.name"),
-      "--class", desc.command.mainClass,
       "--master", s"mesos://${conf.get("spark.master")}",
       "--driver-cores", desc.cores.toString,
       "--driver-memory", s"${desc.mem}M")
+
+    // Assume empty main class means we're running python
+    if (!desc.command.mainClass.equals("")) {
+      options ++= Seq("--class", desc.command.mainClass)
+    }
+
     desc.schedulerProperties.get("spark.executor.memory").map { v =>
       options ++= Seq("--executor-memory", v)
     }
     desc.schedulerProperties.get("spark.cores.max").map { v =>
       options ++= Seq("--total-executor-cores", v)
     }
+    desc.schedulerProperties.get("spark.submit.pyFiles").map { pyFiles =>
+      val formattedFiles = pyFiles.split(",")
+        .map { path => new File(sandboxPath, path.split("/").last).toString() }
+        .mkString(",")
+      options ++= Seq("--py-files", formattedFiles)
+    }
     options
   }
 
 
@@ -812,6 +812,7 @@ class DAGSchedulerSuite
     }
 
     // The map stage should have been submitted.
+    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
     assert(countSubmittedMapStageAttempts() === 1)
 
     complete(taskSets(0), Seq(
 
@@ -157,6 +157,8 @@ From the client, you can submit a job to Mesos cluster by running `spark-submit`
 to the url of the MesosClusterDispatcher (e.g: mesos://dispatcher:7077). You can view driver statuses on the
 Spark cluster Web UI.
 
+Note that jars or python files that are passed to spark-submit should be URIs reachable by Mesos slaves.
+
 # Mesos Run Modes
 
 Spark can run over Mesos in two modes: "fine-grained" (default) and "coarse-grained".
 
@@ -32,6 +32,7 @@ private[ml] trait MultilayerPerceptronParams extends PredictorParams
   with HasSeed with HasMaxIter with HasTol {
   /**
    * Layer sizes including input size and output size.
+   * Default: Array(1, 1)
    * @group param
    */
   final val layers: IntArrayParam = new IntArrayParam(this, "layers",
@@ -50,6 +51,7 @@ private[ml] trait MultilayerPerceptronParams extends PredictorParams
    * Data is stacked within partitions. If block size is more than remaining data in
    * a partition then it is adjusted to the size of this data.
    * Recommended size is between 10 and 1000.
+   * Default: 128
    * @group expertParam
    */
   final val blockSize: IntParam = new IntParam(this, "blockSize",
 
@@ -38,6 +38,7 @@ class BinaryClassificationEvaluator(override val uid: String)
 
   /**
    * param for metric name in evaluation
+   * Default: areaUnderROC
    * @group param
    */
   val metricName: Param[String] = {
 
@@ -41,6 +41,7 @@ final class Binarizer(override val uid: String)
    * Param for threshold used to binarize continuous features.
    * The features greater than the threshold, will be binarized to 1.0.
    * The features equal to or less than the threshold, will be binarized to 0.0.
+   * Default: 0.0
    * @group param
    */
   val threshold: DoubleParam =
 
@@ -35,6 +35,7 @@ private[feature] trait IDFBase extends Params with HasInputCol with HasOutputCol
 
   /**
    * The minimum of documents in which a term should appear.
+   * Default: 0
    * @group param
    */
   final val minDocFreq = new IntParam(
 
@@ -98,6 +98,7 @@ class StopWordsRemover(override val uid: String)
 
   /**
    * the stop words set to be filtered out
+   * Default: [[StopWords.English]]
    * @group param
    */
   val stopWords: StringArrayParam = new StringArrayParam(this, "stopWords", "stop words")
@@ -110,6 +111,7 @@ class StopWordsRemover(override val uid: String)
 
   /**
    * whether to do a case sensitive comparison over the stop words
+   * Default: false
    * @group param
    */
   val caseSensitive: BooleanParam = new BooleanParam(this, "caseSensitive",
Original file line number	Diff line number	Diff line change
`@@ -60,7 +60,7 @@ readString <- function(con) {`
`60`	`60`	`raw <- readBin(con, raw(), stringLen, endian = "big")`
`61`	`61`	`string <- rawToChar(raw)`
`62`	`62`	`Encoding(string) <- "UTF-8"`
`63`		`- string`
	`63`	`+ string`
`64`	`64`	`}`
`65`	`65`
`66`	`66`	`readInt <- function(con) {`
Original file line number	Diff line number	Diff line change
`@@ -812,6 +812,7 @@ class DAGSchedulerSuite`
`812`	`812`	`}`
`813`	`813`
`814`	`814`	`// The map stage should have been submitted.`
	`815`	`+ sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)`
`815`	`816`	`assert(countSubmittedMapStageAttempts() === 1)`
`816`	`817`
`817`	`818`	`complete(taskSets(0), Seq(`